diff --git a/.clang-format b/.clang-format index 86c20ee744de..48405c54ef27 100644 --- a/.clang-format +++ b/.clang-format @@ -722,6 +722,13 @@ ForEachMacros: - 'v4l2_m2m_for_each_src_buf' - 'v4l2_m2m_for_each_src_buf_safe' - 'virtio_device_for_each_vq' + - 'vkms_config_for_each_connector' + - 'vkms_config_for_each_crtc' + - 'vkms_config_for_each_encoder' + - 'vkms_config_for_each_plane' + - 'vkms_config_connector_for_each_possible_encoder' + - 'vkms_config_encoder_for_each_possible_crtc' + - 'vkms_config_plane_for_each_possible_crtc' - 'while_for_each_ftrace_op' - 'workloads__for_each' - 'xa_for_each' diff --git a/Documentation/ABI/stable/sysfs-class-backlight b/Documentation/ABI/stable/sysfs-class-backlight index 6102d6bebdf9..40b8c46b95b2 100644 --- a/Documentation/ABI/stable/sysfs-class-backlight +++ b/Documentation/ABI/stable/sysfs-class-backlight @@ -26,7 +26,12 @@ Date: March 2006 KernelVersion: 2.6.17 Contact: Richard Purdie Description: - Show the actual brightness by querying the hardware. + Show the actual brightness by querying the hardware. Due + to implementation differences in hardware this may not + match the value in 'brightness'. For example some hardware + may treat blanking differently or have custom power saving + features. Userspace should generally use the values in + 'brightness' to make decisions. Users: HAL What: /sys/class/backlight//max_brightness diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index cb207c79680d..4ca917ac6382 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -124,3 +124,27 @@ Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan1_input +Date: March 2025 +KernelVersion: 6.16 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 1 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan2_input +Date: March 2025 +KernelVersion: 6.16 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 2 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan3_input +Date: March 2025 +KernelVersion: 6.16 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 3 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index c787f72957a4..4b7f3646ec6c 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -647,6 +647,38 @@ Examples:: %p4cc Y10 little-endian (0x20303159) %p4cc NV12 big-endian (0xb231564e) +Generic FourCC code +------------------- + +:: + %p4c[h[R]lb] gP00 (0x67503030) + +Print a generic FourCC code, as both ASCII characters and its numerical +value as hexadecimal. + +The generic FourCC code is always printed in the big-endian format, +the most significant byte first. This is the opposite of V4L/DRM FourCCs. + +The additional ``h``, ``hR``, ``l``, and ``b`` specifiers define what +endianness is used to load the stored bytes. The data might be interpreted +using the host, reversed host byte order, little-endian, or big-endian. + +Passed by reference. + +Examples for a little-endian machine, given &(u32)0x67503030:: + + %p4ch gP00 (0x67503030) + %p4chR 00Pg (0x30305067) + %p4cl gP00 (0x67503030) + %p4cb 00Pg (0x30305067) + +Examples for a big-endian machine, given &(u32)0x67503030:: + + %p4ch gP00 (0x67503030) + %p4chR 00Pg (0x30305067) + %p4cl 00Pg (0x30305067) + %p4cb gP00 (0x67503030) + Rust ---- diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml new file mode 100644 index 000000000000..bde4dc556d4f --- /dev/null +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek HDMI MT8195 series HDMI Display Data Channel (DDC) + +maintainers: + - AngeloGioacchino Del Regno + - CK Hu + +properties: + compatible: + oneOf: + - const: mediatek,mt8195-hdmi-ddc + - items: + - const: mediatek,mt8188-hdmi-ddc + - const: mediatek,mt8195-hdmi-ddc + + clocks: + maxItems: 1 + + power-domains: + maxItems: 1 + +required: + - compatible + - clocks + +additionalProperties: false + +examples: + - | + hdmi { + hdmi_ddc: i2c { + compatible = "mediatek,mt8195-hdmi-ddc"; + clocks = <&clk26m>; + }; + }; +... diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml new file mode 100644 index 000000000000..1b382f99d3ce --- /dev/null +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,mt8195-hdmi.yaml @@ -0,0 +1,151 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/mediatek/mediatek,mt8195-hdmi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek MT8195 series HDMI-TX Encoder + +maintainers: + - AngeloGioacchino Del Regno + - CK Hu + +description: + The MediaTek HDMI-TX v2 encoder can generate HDMI format data based on + the HDMI Specification 2.0b. + +properties: + compatible: + enum: + - mediatek,mt8188-hdmi-tx + - mediatek,mt8195-hdmi-tx + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: HDMI Peripheral Bus (APB) clock + - description: HDCP and HDMI_TOP clock + - description: HDCP, HDMI_TOP and HDMI Audio reference clock + - description: VPP HDMI Split clock + + clock-names: + items: + - const: bus + - const: hdcp + - const: hdcp24m + - const: hdmi-split + + i2c: + type: object + $ref: /schemas/display/mediatek/mediatek,mt8195-hdmi-ddc.yaml + unevaluatedProperties: false + description: HDMI DDC I2C controller + + phys: + maxItems: 1 + description: PHY providing clocking TMDS and pixel to controller + + phy-names: + items: + - const: hdmi + + power-domains: + maxItems: 1 + + '#sound-dai-cells': + const: 1 + + ports: + $ref: /schemas/graph.yaml#/properties/ports + + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: + Input port, usually connected to the output port of a DPI + + port@1: + $ref: /schemas/graph.yaml#/properties/port + description: + Output port that must be connected either to the input port of + a HDMI connector node containing a ddc-i2c-bus, or to the input + port of an attached bridge chip, such as a SlimPort transmitter. + + required: + - port@0 + - port@1 + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - power-domains + - phys + - phy-names + - ports + +allOf: + - $ref: /schemas/sound/dai-common.yaml# + +additionalProperties: false + +examples: + - | + #include + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + hdmi@1c300000 { + compatible = "mediatek,mt8195-hdmi-tx"; + reg = <0 0x1c300000 0 0x1000>; + clocks = <&topckgen CLK_TOP_HDMI_APB>, + <&topckgen CLK_TOP_HDCP>, + <&topckgen CLK_TOP_HDCP_24M>, + <&vppsys1 CLK_VPP1_VPP_SPLIT_HDMI>; + clock-names = "bus", "hdcp", "hdcp24m", "hdmi-split"; + interrupts = ; + phys = <&hdmi_phy>; + phy-names = "hdmi"; + power-domains = <&spm MT8195_POWER_DOMAIN_HDMI_TX>; + pinctrl-names = "default"; + pinctrl-0 = <&hdmi_pins>; + #sound-dai-cells = <1>; + + hdmitx_ddc: i2c { + compatible = "mediatek,mt8195-hdmi-ddc"; + clocks = <&clk26m>; + }; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + hdmi_in: endpoint { + remote-endpoint = <&dpi1_out>; + }; + }; + + port@1 { + reg = <1>; + + hdmi_out: endpoint { + remote-endpoint = <&hdmi_connector_in>; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml index e00b88332f2f..246bbb509bea 100644 --- a/Documentation/devicetree/bindings/display/msm/dp-controller.yaml +++ b/Documentation/devicetree/bindings/display/msm/dp-controller.yaml @@ -31,6 +31,7 @@ properties: - qcom,sm8650-dp - items: - enum: + - qcom,sar2130p-dp - qcom,sm6350-dp - qcom,sm8150-dp - qcom,sm8250-dp diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 2aab33cd0017..82fe95a6d959 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -23,6 +23,8 @@ properties: - qcom,msm8996-dsi-ctrl - qcom,msm8998-dsi-ctrl - qcom,qcm2290-dsi-ctrl + - qcom,sa8775p-dsi-ctrl + - qcom,sar2130p-dsi-ctrl - qcom,sc7180-dsi-ctrl - qcom,sc7280-dsi-ctrl - qcom,sdm660-dsi-ctrl @@ -314,6 +316,8 @@ allOf: contains: enum: - qcom,msm8998-dsi-ctrl + - qcom,sa8775p-dsi-ctrl + - qcom,sar2130p-dsi-ctrl - qcom,sc7180-dsi-ctrl - qcom,sc7280-dsi-ctrl - qcom,sdm845-dsi-ctrl diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml index 321470435e65..3c75ff42999a 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml @@ -17,6 +17,8 @@ properties: enum: - qcom,dsi-phy-7nm - qcom,dsi-phy-7nm-8150 + - qcom,sa8775p-dsi-phy-5nm + - qcom,sar2130p-dsi-phy-5nm - qcom,sc7280-dsi-phy-7nm - qcom,sm6375-dsi-phy-7nm - qcom,sm8350-dsi-phy-5nm diff --git a/Documentation/devicetree/bindings/display/msm/hdmi.yaml b/Documentation/devicetree/bindings/display/msm/hdmi.yaml index d4a2033afea8..dfec6c3480f3 100644 --- a/Documentation/devicetree/bindings/display/msm/hdmi.yaml +++ b/Documentation/devicetree/bindings/display/msm/hdmi.yaml @@ -66,21 +66,6 @@ properties: maxItems: 1 description: hpd pin - qcom,hdmi-tx-mux-en-gpios: - maxItems: 1 - deprecated: true - description: HDMI mux enable pin - - qcom,hdmi-tx-mux-sel-gpios: - maxItems: 1 - deprecated: true - description: HDMI mux select pin - - qcom,hdmi-tx-mux-lpm-gpios: - maxItems: 1 - deprecated: true - description: HDMI mux lpm pin - '#sound-dai-cells': const: 1 @@ -89,12 +74,12 @@ properties: $ref: /schemas/graph.yaml#/properties/ports properties: port@0: - $ref: /schemas/graph.yaml#/$defs/port-base + $ref: /schemas/graph.yaml#/properties/port description: | Input endpoints of the controller. port@1: - $ref: /schemas/graph.yaml#/$defs/port-base + $ref: /schemas/graph.yaml#/properties/port description: | Output endpoints of the controller. diff --git a/Documentation/devicetree/bindings/display/msm/mdp4.yaml b/Documentation/devicetree/bindings/display/msm/mdp4.yaml index 35204a287579..03ee09faa335 100644 --- a/Documentation/devicetree/bindings/display/msm/mdp4.yaml +++ b/Documentation/devicetree/bindings/display/msm/mdp4.yaml @@ -18,9 +18,10 @@ properties: clocks: minItems: 6 - maxItems: 6 + maxItems: 8 clock-names: + minItems: 6 items: - const: core_clk - const: iface_clk @@ -28,6 +29,12 @@ properties: - const: lut_clk - const: hdmi_clk - const: tv_clk + - const: lcdc_clk + - const: pxo + description: XO used to drive the internal LVDS PLL + + '#clock-cells': + const: 0 reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/display/msm/qcom,mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,mdss.yaml index 7c6462caa442..db9c43b20e2a 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,mdss.yaml @@ -84,6 +84,18 @@ properties: items: - description: MDSS_CORE reset + interconnects: + minItems: 1 + items: + - description: Interconnect path from mdp0 (or a single mdp) port to the data bus + - description: Interconnect path from CPU to the reg bus + + interconnect-names: + minItems: 1 + items: + - const: mdp0-mem + - const: cpu-cfg + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml index 5fac3e266703..1053b3bc4908 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sa8775p-mdss.yaml @@ -52,12 +52,23 @@ patternProperties: items: - const: qcom,sa8775p-dp + "^dsi@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + contains: + const: qcom,sa8775p-dsi-ctrl + "^phy@[0-9a-f]+$": type: object additionalProperties: true properties: compatible: - const: qcom,sa8775p-edp-phy + contains: + enum: + - qcom,sa8775p-dsi-phy-5nm + - qcom,sa8775p-edp-phy required: - compatible @@ -139,6 +150,20 @@ examples: remote-endpoint = <&mdss0_dp0_in>; }; }; + + port@1 { + reg = <1>; + dpu_intf1_out: endpoint { + remote-endpoint = <&mdss0_dsi0_in>; + }; + }; + + port@2 { + reg = <2>; + dpu_intf2_out: endpoint { + remote-endpoint = <&mdss0_dsi1_in>; + }; + }; }; mdss0_mdp_opp_table: opp-table { @@ -186,6 +211,160 @@ examples: vdda-pll-supply = <&vreg_l4a>; }; + dsi@ae94000 { + compatible = "qcom,sa8775p-dsi-ctrl", "qcom,mdss-dsi-ctrl"; + reg = <0x0ae94000 0x400>; + reg-names = "dsi_ctrl"; + + interrupt-parent = <&mdss>; + interrupts = <4>; + + clocks = <&dispc_byte_clk>, + <&dispcc_intf_clk>, + <&dispcc_pclk>, + <&dispcc_esc_clk>, + <&dispcc_ahb_clk>, + <&gcc_bus_clk>; + clock-names = "byte", + "byte_intf", + "pixel", + "core", + "iface", + "bus"; + assigned-clocks = <&dispcc_byte_clk>, + <&dispcc_pclk>; + assigned-clock-parents = <&mdss0_dsi0_phy 0>, <&mdss0_dsi0_phy 1>; + phys = <&mdss0_dsi0_phy>; + + operating-points-v2 = <&dsi0_opp_table>; + power-domains = <&rpmhpd SA8775P_MMCX>; + + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + mdss0_dsi0_in: endpoint { + remote-endpoint = <&dpu_intf1_out>; + }; + }; + + port@1 { + reg = <1>; + mdss0_dsi0_out: endpoint { }; + }; + }; + + dsi0_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-358000000 { + opp-hz = /bits/ 64 <358000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + }; + }; + + mdss0_dsi0_phy: phy@ae94400 { + compatible = "qcom,sa8775p-dsi-phy-5nm"; + reg = <0x0ae94400 0x200>, + <0x0ae94600 0x280>, + <0x0ae94900 0x27c>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + #clock-cells = <1>; + #phy-cells = <0>; + + clocks = <&dispcc_iface_clk>, + <&rpmhcc_ref_clk>; + clock-names = "iface", "ref"; + + vdds-supply = <&vreg_dsi_supply>; + }; + + dsi@ae96000 { + compatible = "qcom,sa8775p-dsi-ctrl", "qcom,mdss-dsi-ctrl"; + reg = <0x0ae96000 0x400>; + reg-names = "dsi_ctrl"; + + interrupt-parent = <&mdss>; + interrupts = <4>; + + clocks = <&dispc_byte_clk>, + <&dispcc_intf_clk>, + <&dispcc_pclk>, + <&dispcc_esc_clk>, + <&dispcc_ahb_clk>, + <&gcc_bus_clk>; + clock-names = "byte", + "byte_intf", + "pixel", + "core", + "iface", + "bus"; + assigned-clocks = <&dispcc_byte_clk>, + <&dispcc_pclk>; + assigned-clock-parents = <&mdss0_dsi1_phy 0>, <&mdss0_dsi1_phy 1>; + phys = <&mdss0_dsi1_phy>; + + operating-points-v2 = <&dsi1_opp_table>; + power-domains = <&rpmhpd SA8775P_MMCX>; + + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + mdss0_dsi1_in: endpoint { + remote-endpoint = <&dpu_intf2_out>; + }; + }; + + port@1 { + reg = <1>; + mdss0_dsi1_out: endpoint { }; + }; + }; + + dsi1_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-358000000 { + opp-hz = /bits/ 64 <358000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + }; + }; + + mdss0_dsi1_phy: phy@ae96400 { + compatible = "qcom,sa8775p-dsi-phy-5nm"; + reg = <0x0ae96400 0x200>, + <0x0ae96600 0x280>, + <0x0ae96900 0x27c>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + #clock-cells = <1>; + #phy-cells = <0>; + + clocks = <&dispcc_iface_clk>, + <&rpmhcc_ref_clk>; + clock-names = "iface", "ref"; + + vdds-supply = <&vreg_dsi_supply>; + }; + displayport-controller@af54000 { compatible = "qcom,sa8775p-dp"; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml new file mode 100644 index 000000000000..870144b53cec --- /dev/null +++ b/Documentation/devicetree/bindings/display/msm/qcom,sar2130p-mdss.yaml @@ -0,0 +1,439 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/msm/qcom,sar2130p-mdss.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SAR2130P Display MDSS + +maintainers: + - Dmitry Baryshkov + +description: + SAR2310P MSM Mobile Display Subsystem(MDSS), which encapsulates sub-blocks like + DPU display controller, DSI and DP interfaces etc. + +$ref: /schemas/display/msm/mdss-common.yaml# + +properties: + compatible: + const: qcom,sar2130p-mdss + + clocks: + items: + - description: Display MDSS AHB + - description: Display AHB + - description: Display hf AXI + - description: Display core + + iommus: + maxItems: 1 + + interconnects: + items: + - description: Interconnect path from mdp0 port to the data bus + - description: Interconnect path from CPU to the reg bus + + interconnect-names: + items: + - const: mdp0-mem + - const: cpu-cfg + +patternProperties: + "^display-controller@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + const: qcom,sar2130p-dpu + + "^displayport-controller@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + contains: + const: qcom,sar2130p-dp + + "^dsi@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + contains: + const: qcom,sar2130p-dsi-ctrl + + "^phy@[0-9a-f]+$": + type: object + additionalProperties: true + properties: + compatible: + const: qcom,sar2130p-dsi-phy-5nm + +required: + - compatible + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + display-subsystem@ae00000 { + compatible = "qcom,sar2130p-mdss"; + reg = <0x0ae00000 0x1000>; + reg-names = "mdss"; + + interconnects = <&mmss_noc_master_mdp &mc_virt_slave_ebi1>, + <&gem_noc_master_appss_proc &config_noc_slave_display_cfg>; + interconnect-names = "mdp0-mem", "cpu-cfg"; + + resets = <&dispcc_disp_cc_mdss_core_bcr>; + + power-domains = <&dispcc_mdss_gdsc>; + + clocks = <&dispcc_disp_cc_mdss_ahb_clk>, + <&gcc_gcc_disp_ahb_clk>, + <&gcc_gcc_disp_hf_axi_clk>, + <&dispcc_disp_cc_mdss_mdp_clk>; + clock-names = "iface", "bus", "nrt_bus", "core"; + + interrupts = ; + interrupt-controller; + #interrupt-cells = <1>; + + iommus = <&apps_smmu 0x1c00 0x2>; + + #address-cells = <1>; + #size-cells = <1>; + ranges; + + display-controller@ae01000 { + compatible = "qcom,sar2130p-dpu"; + reg = <0x0ae01000 0x8f000>, + <0x0aeb0000 0x2008>; + reg-names = "mdp", "vbif"; + + clocks = <&gcc_gcc_disp_ahb_clk>, + <&gcc_gcc_disp_hf_axi_clk>, + <&dispcc_disp_cc_mdss_ahb_clk>, + <&dispcc_disp_cc_mdss_mdp_lut_clk>, + <&dispcc_disp_cc_mdss_mdp_clk>, + <&dispcc_disp_cc_mdss_vsync_clk>; + clock-names = "bus", + "nrt_bus", + "iface", + "lut", + "core", + "vsync"; + + assigned-clocks = <&dispcc_disp_cc_mdss_vsync_clk>; + assigned-clock-rates = <19200000>; + + operating-points-v2 = <&mdp_opp_table>; + power-domains = <&rpmhpd RPMHPD_MMCX>; + + interrupt-parent = <&mdss>; + interrupts = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + dpu_intf0_out: endpoint { + remote-endpoint = <&mdss_dp0_in>; + }; + }; + + port@1 { + reg = <1>; + + dpu_intf1_out: endpoint { + remote-endpoint = <&mdss_dsi0_in>; + }; + }; + + port@2 { + reg = <2>; + + dpu_intf2_out: endpoint { + remote-endpoint = <&mdss_dsi1_in>; + }; + }; + }; + + mdp_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-200000000 { + opp-hz = /bits/ 64 <200000000>; + required-opps = <&rpmhpd_opp_low_svs>; + }; + + opp-325000000 { + opp-hz = /bits/ 64 <325000000>; + required-opps = <&rpmhpd_opp_svs>; + }; + + opp-375000000 { + opp-hz = /bits/ 64 <375000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + + opp-514000000 { + opp-hz = /bits/ 64 <514000000>; + required-opps = <&rpmhpd_opp_nom>; + }; + }; + }; + + displayport-controller@ae90000 { + compatible = "qcom,sar2130p-dp", + "qcom,sm8350-dp"; + reg = <0xae90000 0x200>, + <0xae90200 0x200>, + <0xae90400 0xc00>, + <0xae91000 0x400>, + <0xae91400 0x400>; + + interrupt-parent = <&mdss>; + interrupts = <12>; + clocks = <&dispcc_disp_cc_mdss_ahb_clk>, + <&dispcc_disp_cc_mdss_dptx0_aux_clk>, + <&dispcc_disp_cc_mdss_dptx0_link_clk>, + <&dispcc_disp_cc_mdss_dptx0_link_intf_clk>, + <&dispcc_disp_cc_mdss_dptx0_pixel0_clk>; + clock-names = "core_iface", + "core_aux", + "ctrl_link", + "ctrl_link_iface", + "stream_pixel"; + + assigned-clocks = <&dispcc_disp_cc_mdss_dptx0_link_clk_src>, + <&dispcc_disp_cc_mdss_dptx0_pixel0_clk_src>; + assigned-clock-parents = <&usb_dp_qmpphy_QMP_USB43DP_DP_LINK_CLK>, + <&usb_dp_qmpphy_QMP_USB43DP_DP_VCO_DIV_CLK>; + + phys = <&usb_dp_qmpphy QMP_USB43DP_DP_PHY>; + phy-names = "dp"; + + #sound-dai-cells = <0>; + + operating-points-v2 = <&dp_opp_table>; + power-domains = <&rpmhpd RPMHPD_MMCX>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + mdss_dp0_in: endpoint { + remote-endpoint = <&dpu_intf0_out>; + }; + }; + + port@1 { + reg = <1>; + mdss_dp0_out: endpoint { + remote-endpoint = <&usb_dp_qmpphy_dp_in>; + }; + }; + }; + + dp_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-162000000 { + opp-hz = /bits/ 64 <162000000>; + required-opps = <&rpmhpd_opp_low_svs_d1>; + }; + + opp-270000000 { + opp-hz = /bits/ 64 <270000000>; + required-opps = <&rpmhpd_opp_low_svs>; + }; + + opp-540000000 { + opp-hz = /bits/ 64 <540000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + + opp-810000000 { + opp-hz = /bits/ 64 <810000000>; + required-opps = <&rpmhpd_opp_nom>; + }; + }; + }; + + dsi@ae94000 { + compatible = "qcom,sar2130p-dsi-ctrl", + "qcom,mdss-dsi-ctrl"; + reg = <0x0ae94000 0x400>; + reg-names = "dsi_ctrl"; + + interrupt-parent = <&mdss>; + interrupts = <4>; + + clocks = <&dispcc_disp_cc_mdss_byte0_clk>, + <&dispcc_disp_cc_mdss_byte0_intf_clk>, + <&dispcc_disp_cc_mdss_pclk0_clk>, + <&dispcc_disp_cc_mdss_esc0_clk>, + <&dispcc_disp_cc_mdss_ahb_clk>, + <&gcc_gcc_disp_hf_axi_clk>; + clock-names = "byte", + "byte_intf", + "pixel", + "core", + "iface", + "bus"; + + assigned-clocks = <&dispcc_disp_cc_mdss_byte0_clk_src>, + <&dispcc_disp_cc_mdss_pclk0_clk_src>; + assigned-clock-parents = <&mdss_dsi0_phy 0>, <&mdss_dsi0_phy 1>; + + operating-points-v2 = <&dsi_opp_table>; + power-domains = <&rpmhpd RPMHPD_MMCX>; + + phys = <&mdss_dsi0_phy>; + phy-names = "dsi"; + + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + mdss_dsi0_in: endpoint { + remote-endpoint = <&dpu_intf1_out>; + }; + }; + + port@1 { + reg = <1>; + + mdss_dsi0_out: endpoint { + }; + }; + }; + + dsi_opp_table: opp-table { + compatible = "operating-points-v2"; + + opp-187500000 { + opp-hz = /bits/ 64 <187500000>; + required-opps = <&rpmhpd_opp_low_svs>; + }; + + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + required-opps = <&rpmhpd_opp_svs>; + }; + + opp-358000000 { + opp-hz = /bits/ 64 <358000000>; + required-opps = <&rpmhpd_opp_svs_l1>; + }; + }; + }; + + mdss_dsi0_phy: phy@ae94400 { + compatible = "qcom,sar2130p-dsi-phy-5nm"; + reg = <0x0ae95000 0x200>, + <0x0ae95200 0x280>, + <0x0ae95500 0x400>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + #clock-cells = <1>; + #phy-cells = <0>; + + clocks = <&dispcc_disp_cc_mdss_ahb_clk>, + <&rpmhcc_rpmh_cxo_clk>; + clock-names = "iface", "ref"; + }; + + dsi@ae96000 { + compatible = "qcom,sar2130p-dsi-ctrl", + "qcom,mdss-dsi-ctrl"; + reg = <0x0ae96000 0x400>; + reg-names = "dsi_ctrl"; + + interrupt-parent = <&mdss>; + interrupts = <5>; + + clocks = <&dispcc_disp_cc_mdss_byte1_clk>, + <&dispcc_disp_cc_mdss_byte1_intf_clk>, + <&dispcc_disp_cc_mdss_pclk1_clk>, + <&dispcc_disp_cc_mdss_esc1_clk>, + <&dispcc_disp_cc_mdss_ahb_clk>, + <&gcc_gcc_disp_hf_axi_clk>; + clock-names = "byte", + "byte_intf", + "pixel", + "core", + "iface", + "bus"; + + assigned-clocks = <&dispcc_disp_cc_mdss_byte1_clk_src>, + <&dispcc_disp_cc_mdss_pclk1_clk_src>; + assigned-clock-parents = <&mdss_dsi1_phy 0>, <&mdss_dsi1_phy 1>; + + operating-points-v2 = <&dsi_opp_table>; + power-domains = <&rpmhpd RPMHPD_MMCX>; + + phys = <&mdss_dsi1_phy>; + phy-names = "dsi"; + + #address-cells = <1>; + #size-cells = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + mdss_dsi1_in: endpoint { + remote-endpoint = <&dpu_intf2_out>; + }; + }; + + port@1 { + reg = <1>; + + mdss_dsi1_out: endpoint { + }; + }; + }; + }; + + mdss_dsi1_phy: phy@ae97000 { + compatible = "qcom,sar2130p-dsi-phy-5nm"; + reg = <0x0ae97000 0x200>, + <0x0ae97200 0x280>, + <0x0ae97500 0x400>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + #clock-cells = <1>; + #phy-cells = <0>; + + clocks = <&dispcc_disp_cc_mdss_ahb_clk>, + <&rpmhcc_rpmh_cxo_clk>; + clock-names = "iface", "ref"; + }; + }; +... diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-dpu.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-dpu.yaml index 6902795b4e2c..df9ec15ad6c3 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sc7280-dpu.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sc7280-dpu.yaml @@ -17,6 +17,7 @@ $ref: /schemas/display/msm/dpu-common.yaml# properties: compatible: enum: + - qcom,sar2130p-dpu - qcom,sc7280-dpu - qcom,sc8280xp-dpu - qcom,sm8350-dpu diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml index 163fc83c1e80..68176de854b3 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8350-mdss.yaml @@ -38,12 +38,16 @@ properties: maxItems: 1 interconnects: - maxItems: 2 + items: + - description: Interconnect path from the MDP0 port to the data bus + - description: Interconnect path from the MDP1 port to the data bus + - description: Interconnect path from the CPU to the reg bus interconnect-names: items: - const: mdp0-mem - const: mdp1-mem + - const: cpu-cfg patternProperties: "^display-controller@[0-9a-f]+$": @@ -88,6 +92,7 @@ examples: #include #include #include + #include #include #include @@ -97,8 +102,10 @@ examples: reg-names = "mdss"; interconnects = <&mmss_noc MASTER_MDP0 0 &mc_virt SLAVE_EBI1 0>, - <&mmss_noc MASTER_MDP1 0 &mc_virt SLAVE_EBI1 0>; - interconnect-names = "mdp0-mem", "mdp1-mem"; + <&mmss_noc MASTER_MDP1 0 &mc_virt SLAVE_EBI1 0>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>; + interconnect-names = "mdp0-mem", "mdp1-mem", "cpu-cfg"; power-domains = <&dispcc MDSS_GDSC>; resets = <&dispcc DISP_CC_MDSS_CORE_BCR>; diff --git a/Documentation/devicetree/bindings/display/panel/boe,td4320.yaml b/Documentation/devicetree/bindings/display/panel/boe,td4320.yaml new file mode 100644 index 000000000000..c6bff0ece360 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/boe,td4320.yaml @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/boe,td4320.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: BOE TD4320 MIPI-DSI panels + +maintainers: + - Barnabas Czeman + +description: + BOE TD4320 6.3" 1080x2340 panel found in Xiaomi Redmi Note 7 smartphone. + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + items: + - const: boe,td4320 + + reg: + maxItems: 1 + + iovcc-supply: + description: I/O voltage rail + + vsn-supply: + description: Negative source voltage rail + + vsp-supply: + description: Positive source voltage rail + +required: + - compatible + - reg + - reset-gpios + - port + +unevaluatedProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "boe,td4320"; + reg = <0>; + backlight = <&backlight>; + reset-gpios = <&tlmm 45 GPIO_ACTIVE_LOW>; + + port { + panel_in: endpoint { + remote-endpoint = <&dsi_out>; + }; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/panel/himax,hx8279.yaml b/Documentation/devicetree/bindings/display/panel/himax,hx8279.yaml new file mode 100644 index 000000000000..f619aea82bdf --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/himax,hx8279.yaml @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/himax,hx8279.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Himax HX8279/HX8279-D based MIPI-DSI panels + +maintainers: + - AngeloGioacchino Del Regno + +description: + The Himax HX8279 is a 1803 channel outputs source driver with MIPI + TCON, which generates the horizontal and vertical control timing to + the source and gate drivers. + This DriverIC is most suitable for 1200x1920, 1080x1920, 1200x1600, + and 600x1024 panels and outputs full RGB888 over two or four lanes, + single or dual, MIPI-DSI video interface. + +allOf: + - $ref: panel-common-dual.yaml# + +properties: + compatible: + items: + - enum: + - aoly,sl101pm1794fog-v15 + - startek,kd070fhfid078 + - const: himax,hx8279 + + reg: + maxItems: 1 + + iovcc-supply: + description: I/O voltage supply + + vdd-supply: + description: Panel power supply + +required: + - compatible + - reg + - backlight + - reset-gpios + - iovcc-supply + - vdd-supply + +unevaluatedProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "startek,kd070fhfid078", "himax,hx8279"; + reg = <0>; + backlight = <&backlight>; + enable-gpios = <&pio 25 GPIO_ACTIVE_HIGH>; + reset-gpios = <&pio 45 GPIO_ACTIVE_HIGH>; + iovcc-supply = <&vreg_lcm_vio>; + vdd-supply = <&vreg_lcm_vdd>; + + port { + panel_in: endpoint { + remote-endpoint = <&dsi_out>; + }; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml index e2a2dd4ef5fa..5fcea62fd58f 100644 --- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml +++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk050h3146w.yaml @@ -23,6 +23,7 @@ properties: maxItems: 1 backlight: true + port: true reset-gpios: true iovcc-supply: description: regulator that supplies the iovcc voltage diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml index af9e0ea0e72f..b0e2c82232d3 100644 --- a/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml +++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk500hd1829.yaml @@ -22,6 +22,7 @@ properties: maxItems: 1 backlight: true + port: true reset-gpios: true iovcc-supply: description: regulator that supplies the iovcc voltage diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt37801.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt37801.yaml new file mode 100644 index 000000000000..1b38c1d0af68 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/novatek,nt37801.yaml @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/novatek,nt37801.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Novatek NT37801 AMOLED DSI Panel + +maintainers: + - Krzysztof Kozlowski + +description: + Naming is inconclusive and different sources claim this is either Novatek + NT37801 or NT37810 AMOLED DSI Panel. + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: novatek,nt37801 + + reg: + maxItems: 1 + description: DSI virtual channel + + vci-supply: true + vdd-supply: true + vddio-supply: true + port: true + reset-gpios: true + +required: + - compatible + - reg + - vci-supply + - vdd-supply + - vddio-supply + - port + - reset-gpios + +additionalProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "novatek,nt37801"; + reg = <0>; + + vci-supply = <&vreg_l13b_3p0>; + vdd-supply = <&vreg_l11b_1p2>; + vddio-supply = <&vreg_l12b_1p8>; + + reset-gpios = <&tlmm 98 GPIO_ACTIVE_LOW>; + + port { + endpoint { + remote-endpoint = <&dsi0_out>; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index b0de4fd6f3d4..5542c9229d54 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -226,6 +226,8 @@ properties: - netron-dy,e231732 # Newhaven Display International 480 x 272 TFT LCD panel - newhaven,nhd-4.3-480272ef-atxl + # NLT Technologies, Ltd. 15.6" WXGA (1366×768) LVDS TFT LCD panel + - nlt,nl13676bc25-03f # New Vision Display 7.0" 800 RGB x 480 TFT LCD panel - nvd,9128 # OKAYA Electric America, Inc. RS800480T-7X0GP 7" WVGA LCD panel @@ -246,6 +248,8 @@ properties: - osddisplays,osd070t1718-19ts # One Stop Displays OSD101T2045-53TS 10.1" 1920x1200 panel - osddisplays,osd101t2045-53ts + # POWERTIP PH128800T004-ZZA01 10.1" WXGA TFT LCD panel + - powertip,ph128800t004-zza01 # POWERTIP PH128800T006-ZHC01 10.1" WXGA TFT LCD panel - powertip,ph128800t006-zhc01 # POWERTIP PH800480T013-IDF2 7.0" WVGA TFT LCD panel @@ -284,6 +288,8 @@ properties: - startek,kd070wvfpa # Team Source Display Technology TST043015CMHX 4.3" WQVGA TFT LCD panel - team-source-display,tst043015cmhx + # Tianma Micro-electronics P0700WXF1MBAA 7.0" WXGA (1280x800) LVDS TFT LCD panel + - tianma,p0700wxf1mbaa # Tianma Micro-electronics TM070JDHG30 7.0" WXGA TFT LCD panel - tianma,tm070jdhg30 # Tianma Micro-electronics TM070JDHG34-00 7.0" WXGA (1280x800) LVDS TFT LCD panel diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 684c2896d238..31f0c0f038e4 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -19,6 +19,8 @@ properties: - const: samsung,atna33xc20 - items: - enum: + # Samsung 14" WQXGA+ (2880×1800 pixels) eDP AMOLED panel + - samsung,atna40yk20 # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel - samsung,atna45af01 # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel diff --git a/Documentation/devicetree/bindings/display/panel/truly,nt35597-2K-display.yaml b/Documentation/devicetree/bindings/display/panel/truly,nt35597-2K-display.yaml new file mode 100644 index 000000000000..36be09c900f2 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/truly,nt35597-2K-display.yaml @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/truly,nt35597-2K-display.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Truly NT35597 DSI 2K display + +maintainers: + - Neil Armstrong + +description: | + Truly NT35597 DSI 2K display is used on the Qualcomm SDM845 MTP board. + +allOf: + - $ref: panel-common-dual.yaml# + +properties: + compatible: + const: truly,nt35597-2K-display + + reg: + maxItems: 1 + + vdda-supply: + description: regulator that provides the supply voltage Power IC supply + + vdispp-supply: + description: regulator that provides the supply voltage for positive LCD bias + + vdispn-supply: + description: regulator that provides the supply voltage for negative LCD bias + + reset-gpios: true + + mode-gpios: + description: + Gpio for choosing the mode of the display for single DSI or Dual DSI. + This should be low for dual DSI and high for single DSI mode. + + ports: + required: + - port@0 + - port@1 + +required: + - compatible + - reg + - vdda-supply + - reset-gpios + - mode-gpios + - ports + +additionalProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "truly,nt35597-2K-display"; + reg = <0>; + + vdda-supply = <&pm8998_l14>; + vdispp-supply = <&lab_regulator>; + vdispn-supply = <&ibb_regulator>; + + reset-gpios = <&tlmm 6 GPIO_ACTIVE_LOW>; + mode-gpios = <&tlmm 52 GPIO_ACTIVE_HIGH>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + panel0_in: endpoint { + remote-endpoint = <&dsi0_out>; + }; + }; + + port@1 { + reg = <1>; + + panel1_in: endpoint { + remote-endpoint = <&dsi1_out>; + }; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/display/panel/visionox,g2647fb105.yaml b/Documentation/devicetree/bindings/display/panel/visionox,g2647fb105.yaml new file mode 100644 index 000000000000..49dcd9b8f670 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/visionox,g2647fb105.yaml @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/visionox,g2647fb105.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Visionox G2647FB105 6.47" 1080x2340 MIPI-DSI Panel + +maintainers: + - Alexander Baransky + +description: + The Visionox G2647FB105 is a 6.47 inch 1080x2340 MIPI-DSI CMD mode OLED panel. + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: visionox,g2647fb105 + + reg: + maxItems: 1 + + vdd3p3-supply: + description: 3.3V source voltage rail + + vddio-supply: + description: I/O source voltage rail + + vsn-supply: + description: Negative source voltage rail + + vsp-supply: + description: Positive source voltage rail + + reset-gpios: true + port: true + +required: + - compatible + - reg + - vdd3p3-supply + - vddio-supply + - vsn-supply + - vsp-supply + - reset-gpios + - port + +additionalProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "visionox,g2647fb105"; + reg = <0>; + + vdd3p3-supply = <&vreg_l7c_3p0>; + vddio-supply = <&vreg_l13a_1p8>; + vsn-supply = <&vreg_ibb>; + vsp-supply = <&vreg_lab>; + + reset-gpios = <&pm6150l_gpios 9 GPIO_ACTIVE_LOW>; + + port { + panel_in: endpoint { + remote-endpoint = <&mdss_dsi0_out>; + }; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,analogix-dp.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,analogix-dp.yaml index 60dedf9b2be7..d99b23b88cc5 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip,analogix-dp.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,analogix-dp.yaml @@ -15,6 +15,7 @@ properties: enum: - rockchip,rk3288-dp - rockchip,rk3399-edp + - rockchip,rk3588-edp clocks: minItems: 2 @@ -31,16 +32,23 @@ properties: maxItems: 1 resets: - maxItems: 1 + minItems: 1 + maxItems: 2 reset-names: - const: dp + minItems: 1 + items: + - const: dp + - const: apb rockchip,grf: $ref: /schemas/types.yaml#/definitions/phandle description: This SoC makes use of GRF regs. + aux-bus: + $ref: /schemas/display/dp-aux-bus.yaml# + required: - compatible - clocks @@ -52,6 +60,19 @@ required: allOf: - $ref: /schemas/display/bridge/analogix,dp.yaml# + - if: + properties: + compatible: + contains: + enum: + - rockchip,rk3588-edp + then: + properties: + resets: + minItems: 2 + reset-names: + minItems: 2 + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,inno-hdmi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,inno-hdmi.yaml index 5b87b0f1963e..290376bec079 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip,inno-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,inno-hdmi.yaml @@ -23,13 +23,11 @@ properties: maxItems: 1 clocks: - minItems: 1 items: - description: The HDMI controller main clock - description: The HDMI PHY reference clock clock-names: - minItems: 1 items: - const: pclk - const: ref @@ -58,6 +56,12 @@ properties: - port@0 - port@1 + rockchip,grf: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to GRF used for control the polarity of hsync/vsync of rk3036 + HDMI. + required: - compatible - reg @@ -77,6 +81,8 @@ allOf: const: rockchip,rk3036-inno-hdmi then: + required: + - rockchip,grf properties: power-domains: false @@ -87,11 +93,6 @@ allOf: const: rockchip,rk3128-inno-hdmi then: - properties: - clocks: - minItems: 2 - clock-names: - minItems: 2 required: - power-domains @@ -106,10 +107,11 @@ examples: compatible = "rockchip,rk3036-inno-hdmi"; reg = <0x20034000 0x4000>; interrupts = ; - clocks = <&cru PCLK_HDMI>; - clock-names = "pclk"; + clocks = <&cru PCLK_HDMI>, <&cru SCLK_LCDC>; + clock-names = "pclk", "ref"; pinctrl-names = "default"; pinctrl-0 = <&hdmi_ctl>; + rockchip,grf = <&grf>; #sound-dai-cells = <0>; ports { diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml index b339b7e708c6..8b5f58103dda 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml @@ -73,12 +73,6 @@ properties: port: $ref: /schemas/graph.yaml#/properties/port - assigned-clocks: - maxItems: 2 - - assigned-clock-rates: - maxItems: 2 - iommus: maxItems: 1 diff --git a/Documentation/devicetree/bindings/display/sitronix,st7571.yaml b/Documentation/devicetree/bindings/display/sitronix,st7571.yaml new file mode 100644 index 000000000000..4fea782fccd7 --- /dev/null +++ b/Documentation/devicetree/bindings/display/sitronix,st7571.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/sitronix,st7571.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sitronix ST7571 Display Controller + +maintainers: + - Marcus Folkesson + +description: + Sitronix ST7571 is a driver and controller for 4-level gray + scale and monochrome dot matrix LCD panels. + +allOf: + - $ref: panel/panel-common.yaml# + +properties: + compatible: + const: sitronix,st7571 + + reg: + maxItems: 1 + + sitronix,grayscale: + type: boolean + description: + Display supports 4-level grayscale. + + reset-gpios: true + width-mm: true + height-mm: true + panel-timing: true + +required: + - compatible + - reg + - reset-gpios + - width-mm + - height-mm + - panel-timing + +additionalProperties: false + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + display@3f { + compatible = "sitronix,st7571"; + reg = <0x3f>; + reset-gpios = <&gpio0 3 GPIO_ACTIVE_LOW>; + width-mm = <37>; + height-mm = <27>; + + panel-timing { + hactive = <128>; + vactive = <96>; + hback-porch = <0>; + vback-porch = <0>; + clock-frequency = <0>; + hfront-porch = <0>; + hsync-len = <0>; + vfront-porch = <0>; + vsync-len = <0>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/truly,nt35597.txt b/Documentation/devicetree/bindings/display/truly,nt35597.txt deleted file mode 100644 index f39c77ee36ea..000000000000 --- a/Documentation/devicetree/bindings/display/truly,nt35597.txt +++ /dev/null @@ -1,59 +0,0 @@ -Truly model NT35597 DSI display driver - -The Truly NT35597 is a generic display driver, currently only configured -for use in the 2K display on the Qualcomm SDM845 MTP board. - -Required properties: -- compatible: should be "truly,nt35597-2K-display" -- vdda-supply: phandle of the regulator that provides the supply voltage - Power IC supply -- vdispp-supply: phandle of the regulator that provides the supply voltage - for positive LCD bias -- vdispn-supply: phandle of the regulator that provides the supply voltage - for negative LCD bias -- reset-gpios: phandle of gpio for reset line - This should be 8mA, gpio can be configured using mux, pinctrl, pinctrl-names - (active low) -- mode-gpios: phandle of the gpio for choosing the mode of the display - for single DSI or Dual DSI - This should be low for dual DSI and high for single DSI mode -- ports: This device has two video ports driven by two DSIs. Their connections - are modeled using the OF graph bindings specified in - Documentation/devicetree/bindings/graph.txt. - - port@0: DSI input port driven by master DSI - - port@1: DSI input port driven by secondary DSI - -Example: - - dsi@ae94000 { - panel@0 { - compatible = "truly,nt35597-2K-display"; - reg = <0>; - vdda-supply = <&pm8998_l14>; - vdispp-supply = <&lab_regulator>; - vdispn-supply = <&ibb_regulator>; - pinctrl-names = "default", "suspend"; - pinctrl-0 = <&dpu_dsi_active>; - pinctrl-1 = <&dpu_dsi_suspend>; - - reset-gpios = <&tlmm 6 GPIO_ACTIVE_LOW>; - mode-gpios = <&tlmm 52 GPIO_ACTIVE_HIGH>; - ports { - #address-cells = <1>; - #size-cells = <0>; - port@0 { - reg = <0>; - panel0_in: endpoint { - remote-endpoint = <&dsi0_out>; - }; - }; - - port@1 { - reg = <1>; - panel1_in: endpoint { - remote-endpoint = <&dsi1_out>; - }; - }; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml index dc078ceeca9a..43c6d2d72456 100644 --- a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml +++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Broadcom V3D GPU maintainers: - - Eric Anholt + - Maíra Canal - Nicolas Saenz Julienne properties: @@ -22,20 +22,12 @@ properties: - brcm,7278-v3d reg: - items: - - description: hub register (required) - - description: core0 register (required) - - description: GCA cache controller register (if GCA controller present) - - description: bridge register (if no external reset controller) minItems: 2 + maxItems: 4 reg-names: - items: - - const: hub - - const: core0 - - enum: [ bridge, gca ] - - enum: [ bridge, gca ] minItems: 2 + maxItems: 4 interrupts: items: @@ -58,6 +50,76 @@ required: - reg-names - interrupts +allOf: + - if: + properties: + compatible: + contains: + const: brcm,2711-v3d + then: + properties: + reg: + items: + - description: hub register + - description: core0 register + reg-names: + items: + - const: hub + - const: core0 + - if: + properties: + compatible: + contains: + const: brcm,2712-v3d + then: + properties: + reg: + items: + - description: hub register + - description: core0 register + - description: SMS state manager register + reg-names: + items: + - const: hub + - const: core0 + - const: sms + - if: + properties: + compatible: + contains: + const: brcm,7268-v3d + then: + properties: + reg: + items: + - description: hub register + - description: core0 register + - description: GCA cache controller register + - description: bridge register + reg-names: + items: + - const: hub + - const: core0 + - const: gca + - const: bridge + - if: + properties: + compatible: + contains: + const: brcm,7278-v3d + then: + properties: + reg: + items: + - description: hub register + - description: core0 register + - description: bridge register + reg-names: + items: + - const: hub + - const: core0 + - const: bridge + additionalProperties: false examples: @@ -66,9 +128,9 @@ examples: compatible = "brcm,7268-v3d"; reg = <0xf1200000 0x4000>, <0xf1208000 0x4000>, - <0xf1204000 0x100>, - <0xf1204100 0x100>; - reg-names = "hub", "core0", "bridge", "gca"; + <0xf1204100 0x100>, + <0xf1204000 0x100>; + reg-names = "hub", "core0", "gca", "bridge"; interrupts = <0 78 4>, <0 77 4>; }; diff --git a/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml b/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml index 256e252f8087..4450e2e73b3c 100644 --- a/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml +++ b/Documentation/devicetree/bindings/gpu/img,powervr-rogue.yaml @@ -12,10 +12,28 @@ maintainers: properties: compatible: - items: - - enum: - - ti,am62-gpu - - const: img,img-axe # IMG AXE GPU model/revision is fully discoverable + oneOf: + - items: + - enum: + - ti,am62-gpu + - const: img,img-axe-1-16m + # This deprecated element must be kept around to allow old kernels to + # work with newer dts. + - const: img,img-axe + - const: img,img-rogue + - items: + - enum: + - ti,j721s2-gpu + - const: img,img-bxs-4-64 + - const: img,img-rogue + + # This legacy combination of compatible strings was introduced early on + # before the more specific GPU identifiers were used. + - items: + - enum: + - ti,am62-gpu + - const: img,img-axe + deprecated: true reg: maxItems: 1 @@ -35,6 +53,18 @@ properties: maxItems: 1 power-domains: + minItems: 1 + maxItems: 2 + + power-domain-names: + items: + - const: a + - const: b + minItems: 1 + + dma-coherent: true + + resets: maxItems: 1 required: @@ -47,11 +77,49 @@ required: additionalProperties: false allOf: + # Constraints added alongside the new compatible strings that would otherwise + # create an ABI break. - if: properties: compatible: contains: - const: ti,am62-gpu + const: img,img-rogue + then: + required: + - power-domains + - power-domain-names + + - if: + properties: + compatible: + contains: + const: img,img-axe-1-16m + then: + properties: + power-domains: + maxItems: 1 + power-domain-names: + maxItems: 1 + + - if: + properties: + compatible: + contains: + const: img,img-bxs-4-64 + then: + properties: + power-domains: + minItems: 2 + power-domain-names: + minItems: 2 + + - if: + properties: + compatible: + contains: + enum: + - ti,am62-gpu + - ti,j721s2-gpu then: properties: clocks: @@ -64,10 +132,12 @@ examples: #include gpu@fd00000 { - compatible = "ti,am62-gpu", "img,img-axe"; + compatible = "ti,am62-gpu", "img,img-axe-1-16m", "img,img-axe", + "img,img-rogue"; reg = <0x0fd00000 0x20000>; clocks = <&k3_clks 187 0>; clock-names = "core"; interrupts = ; power-domains = <&k3_pds 187 TI_SCI_PD_EXCLUSIVE>; + power-domain-names = "a"; }; diff --git a/Documentation/devicetree/bindings/opp/opp-v2-qcom-adreno.yaml b/Documentation/devicetree/bindings/opp/opp-v2-qcom-adreno.yaml new file mode 100644 index 000000000000..a27ba7b663d4 --- /dev/null +++ b/Documentation/devicetree/bindings/opp/opp-v2-qcom-adreno.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/opp/opp-v2-qcom-adreno.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Adreno compatible OPP supply + +description: + Adreno GPUs present in Qualcomm's Snapdragon chipsets uses an OPP specific + ACD related information tailored for the specific chipset. This binding + provides the information needed to describe such a hardware value. + +maintainers: + - Rob Clark + +allOf: + - $ref: opp-v2-base.yaml# + +properties: + compatible: + contains: + const: operating-points-v2-adreno + +patternProperties: + '^opp-[0-9]+$': + type: object + additionalProperties: false + + properties: + opp-hz: true + + opp-level: true + + opp-peak-kBps: true + + opp-supported-hw: true + + qcom,opp-acd-level: + description: | + A positive value representing the ACD (Adaptive Clock Distribution, + a fancy name for clk throttling during voltage droop) level associated + with this OPP node. This value is shared to a co-processor inside GPU + (called Graphics Management Unit a.k.a GMU) during wake up. It may not + be present for some OPPs and GMU will disable ACD while transitioning + to that OPP. This value encodes a voltage threshold, delay cycles & + calibration margins which are identified by characterization of the + SoC. So, it doesn't have any unit. This data is passed to GMU firmware + via 'HFI_H2F_MSG_ACD' packet. + $ref: /schemas/types.yaml#/definitions/uint32 + + required: + - opp-hz + - opp-level + +required: + - compatible + +additionalProperties: false + +examples: + - | + #include + + gpu_opp_table: opp-table { + compatible = "operating-points-v2-adreno", "operating-points-v2"; + + opp-687000000 { + opp-hz = /bits/ 64 <687000000>; + opp-level = ; + opp-peak-kBps = <8171875>; + qcom,opp-acd-level = <0x882e5ffd>; + }; + + opp-550000000 { + opp-hz = /bits/ 64 <550000000>; + opp-level = ; + opp-peak-kBps = <6074219>; + qcom,opp-acd-level = <0xc0285ffd>; + }; + + opp-390000000 { + opp-hz = /bits/ 64 <390000000>; + opp-level = ; + opp-peak-kBps = <3000000>; + qcom,opp-acd-level = <0xc0285ffd>; + }; + + opp-300000000 { + opp-hz = /bits/ 64 <300000000>; + opp-level = ; + opp-peak-kBps = <2136719>; + /* Intentionally left out qcom,opp-acd-level property here */ + }; + + }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 75ffaebc749d..070b16d413d9 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -129,6 +129,8 @@ patternProperties: description: Andes Technology Corporation "^anvo,.*": description: Anvo-Systems Dresden GmbH + "^aoly,.*": + description: Shenzhen Aoly Technology Co., Ltd. "^aosong,.*": description: Guangzhou Aosong Electronic Co., Ltd. "^apm,.*": diff --git a/Documentation/gpu/amdgpu/amd-hardware-list-info.rst b/Documentation/gpu/amdgpu/amd-hardware-list-info.rst new file mode 100644 index 000000000000..1786544fe7c1 --- /dev/null +++ b/Documentation/gpu/amdgpu/amd-hardware-list-info.rst @@ -0,0 +1,23 @@ +================================================= + AMD Hardware Components Information per Product +================================================= + +On this page, you can find the AMD product name and which component version is +part of it. + +Accelerated Processing Units (APU) Info +--------------------------------------- + +.. csv-table:: + :header-rows: 1 + :widths: 3, 2, 2, 1, 1, 1, 1 + :file: ./apu-asic-info-table.csv + +Discrete GPU Info +----------------- + +.. csv-table:: + :header-rows: 1 + :widths: 3, 2, 2, 1, 1, 1 + :file: ./dgpu-asic-info-table.csv + diff --git a/Documentation/gpu/amdgpu/amdgpu-glossary.rst b/Documentation/gpu/amdgpu/amdgpu-glossary.rst index 1e9283e076ba..30812d9d53c6 100644 --- a/Documentation/gpu/amdgpu/amdgpu-glossary.rst +++ b/Documentation/gpu/amdgpu/amdgpu-glossary.rst @@ -12,18 +12,39 @@ we have a dedicated glossary for Display Core at The number of CUs that are active on the system. The number of active CUs may be less than SE * SH * CU depending on the board configuration. + BACO + Bus Alive, Chip Off + + BOCO + Bus Off, Chip Off + CE Constant Engine + CIK + Sea Islands + + CB + Color Buffer + CP Command Processor CPLIB Content Protection Library + CS + Command Submission + + CSB + Clear State Indirect Buffer + CU Compute Unit + DB + Depth Buffer + DFS Digital Frequency Synthesizer @@ -33,6 +54,9 @@ we have a dedicated glossary for Display Core at EOP End Of Pipe/Pipeline + FLR + Function Level Reset + GART Graphics Address Remapping Table. This is the name we use for the GPUVM page table used by the GPU kernel driver. It remaps system resources @@ -45,6 +69,12 @@ we have a dedicated glossary for Display Core at GC Graphics and Compute + GDS + Global Data Share + + GE + Geometry Engine + GMC Graphic Memory Controller @@ -80,6 +110,9 @@ we have a dedicated glossary for Display Core at KCQ Kernel Compute Queue + KFD + Kernel Fusion Driver + KGQ Kernel Graphics Queue @@ -89,6 +122,9 @@ we have a dedicated glossary for Display Core at MC Memory Controller + MCBP + Mid Command Buffer Preemption + ME MicroEngine (Graphics) @@ -104,6 +140,9 @@ we have a dedicated glossary for Display Core at MQD Memory Queue Descriptor + PA + Primitive Assembler / Physical Address + PFP Pre-Fetch Parser (Graphics) @@ -113,24 +152,39 @@ we have a dedicated glossary for Display Core at PSP Platform Security Processor + RB + Render Backends. Some people called it ROPs. + RLC RunList Controller. This name is a remnant of past ages and doesn't have much meaning today. It's a group of general-purpose helper engines for the GFX block. It's involved in GFX power management and SR-IOV, among other things. + SC + Scan Converter + SDMA System DMA SE Shader Engine + SGPR + Scalar General-Purpose Registers + SH SHader array + SI + Southern Islands + SMU/SMC System Management Unit / System Management Controller + SPI (AMDGPU) + Shader Processor Input + SRLC Save/Restore List Control @@ -143,12 +197,21 @@ we have a dedicated glossary for Display Core at SS Spread Spectrum + SX + Shader Export + TA Trusted Application + TC + Texture Cache + TOC Table of Contents + UMSCH + User Mode Scheduler + UVD Unified Video Decoder @@ -158,5 +221,17 @@ we have a dedicated glossary for Display Core at VCN Video Codec Next + VGPR + Vector General-Purpose Registers + + VMID + Virtual Memory ID + VPE Video Processing Engine + + XCC + Accelerator Core Complex + + XCP + Accelerator Core Partition diff --git a/Documentation/gpu/amdgpu/apu-asic-info-table.csv b/Documentation/gpu/amdgpu/apu-asic-info-table.csv index 5dd4b8762d19..1d50b539677f 100644 --- a/Documentation/gpu/amdgpu/apu-asic-info-table.csv +++ b/Documentation/gpu/amdgpu/apu-asic-info-table.csv @@ -13,3 +13,5 @@ Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8 Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11 Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11 Ryzen AI 300 series, Strix Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0 +Ryzen AI 350 series, Krackan Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4 +Ryzen AI Max 300 series, Strix Halo, 3.5.1, 11.5.1, 4.0.6, 6.1.1, 14.0.1 diff --git a/Documentation/gpu/amdgpu/debugfs.rst b/Documentation/gpu/amdgpu/debugfs.rst new file mode 100644 index 000000000000..5150d0a95658 --- /dev/null +++ b/Documentation/gpu/amdgpu/debugfs.rst @@ -0,0 +1,210 @@ +============== +AMDGPU DebugFS +============== + +The amdgpu driver provides a number of debugfs files to aid in debugging +issues in the driver. These are usually found in +/sys/kernel/debug/dri/. + +DebugFS Files +============= + +amdgpu_benchmark +---------------- + +Run benchmarks using the DMA engine the driver uses for GPU memory paging. +Write a number to the file to run the test. The results are written to the +kernel log. VRAM is on device memory (dGPUs) or carve out (APUs) and GTT +(Graphics Translation Tables) is system memory that is accessible by the GPU. +The following tests are available: + +- 1: simple test, VRAM to GTT and GTT to VRAM +- 2: simple test, VRAM to VRAM +- 3: GTT to VRAM, buffer size sweep, powers of 2 +- 4: VRAM to GTT, buffer size sweep, powers of 2 +- 5: VRAM to VRAM, buffer size sweep, powers of 2 +- 6: GTT to VRAM, buffer size sweep, common display sizes +- 7: VRAM to GTT, buffer size sweep, common display sizes +- 8: VRAM to VRAM, buffer size sweep, common display sizes + +amdgpu_test_ib +-------------- + +Read this file to run simple IB (Indirect Buffer) tests on all kernel managed +rings. IBs are command buffers usually generated by userspace applications +which are submitted to the kernel for execution on an particular GPU engine. +This just runs the simple IB tests included in the kernel. These tests +are engine specific and verify that IB submission works. + +amdgpu_discovery +---------------- + +Provides raw access to the IP discovery binary provided by the GPU. Read this +file to access the raw binary. This is useful for verifying the contents of +the IP discovery table. It is chip specific. + +amdgpu_vbios +------------ + +Provides raw access to the ROM binary image from the GPU. Read this file to +access the raw binary. This is useful for verifying the contents of the +video BIOS ROM. It is board specific. + +amdgpu_evict_gtt +---------------- + +Evict all buffers from the GTT memory pool. Read this file to evict all +buffers from this pool. + +amdgpu_evict_vram +----------------- + +Evict all buffers from the VRAM memory pool. Read this file to evict all +buffers from this pool. + +amdgpu_gpu_recover +------------------ + +Trigger a GPU reset. Read this file to trigger reset the entire GPU. +All work currently running on the GPU will be lost. + +amdgpu_ring_ +------------------ + +Provides read access to the kernel managed ring buffers for each ring . +These are useful for debugging problems on a particular ring. The ring buffer +is how the CPU sends commands to the GPU. The CPU writes commands into the +buffer and then asks the GPU engine to process it. This is the raw binary +contents of the ring buffer. Use a tool like UMR to decode the rings into human +readable form. + +amdgpu_mqd_ +----------------- + +Provides read access to the kernel managed MQD (Memory Queue Descriptor) for +ring managed by the kernel driver. MQDs define the features of the ring +and are used to store the ring's state when it is not connected to hardware. +The driver writes the requested ring features and metadata (GPU addresses of +the ring itself and associated buffers) to the MQD and the firmware uses the MQD +to populate the hardware when the ring is mapped to a hardware slot. Only +available on engines which use MQDs. This provides access to the raw MQD +binary. + +amdgpu_error_ +------------------- + +Provides an interface to set an error code on the dma fences associated with +ring . The error code specified is propogated to all fences associated +with the ring. Use this to inject a fence error into a ring. + +amdgpu_pm_info +-------------- + +Provides human readable information about the power management features +and state of the GPU. This includes current GFX clock, Memory clock, +voltages, average SoC power, temperature, GFX load, Memory load, SMU +feature mask, VCN power state, clock and power gating features. + +amdgpu_firmware_info +-------------------- + +Lists the firmware versions for all firmwares used by the GPU. Only +entries with a non-0 version are valid. If the version is 0, the firmware +is not valid for the GPU. + +amdgpu_fence_info +----------------- + +Shows the last signalled and emitted fence sequence numbers for each +kernel driver managed ring. Fences are associated with submissions +to the engine. Emitted fences have been submitted to the ring +and signalled fences have been signalled by the GPU. Rings with a +larger emitted fence value have outstanding work that is still being +processed by the engine that owns that ring. When the emitted and +signalled fence values are equal, the ring is idle. + +amdgpu_gem_info +--------------- + +Lists all of the PIDs using the GPU and the GPU buffers that they have +allocated. This lists the buffer size, pool (VRAM, GTT, etc.), and buffer +attributes (CPU access required, CPU cache attributes, etc.). + +amdgpu_vm_info +-------------- + +Lists all of the PIDs using the GPU and the GPU buffers that they have +allocated as well as the status of those buffers relative to that process' +GPU virtual address space (e.g., evicted, idle, invalidated, etc.). + +amdgpu_sa_info +-------------- + +Prints out all of the suballocations (sa) by the suballocation manager in the +kernel driver. Prints the GPU address, size, and fence info associated +with each suballocation. The suballocations are used internally within +the kernel driver for various things. + +amdgpu__mm +---------------- + +Prints TTM information about the memory pool . + +amdgpu_vram +----------- + +Provides direct access to VRAM. Used by tools like UMR to inspect +objects in VRAM. + +amdgpu_iomem +------------ + +Provides direct access to GTT memory. Used by tools like UMR to inspect +GTT memory. + +amdgpu_regs_* +------------- + +Provides direct access to various register aperatures on the GPU. Used +by tools like UMR to access GPU registers. + +amdgpu_regs2 +------------ + +Provides an IOCTL interface used by UMR for interacting with GPU registers. + + +amdgpu_sensors +-------------- + +Provides an interface to query GPU power metrics (temperature, average +power, etc.). Used by tools like UMR to query GPU power metrics. + + +amdgpu_gca_config +----------------- + +Provides an interface to query GPU details (Graphics/Compute Array config, +PCI config, GPU family, etc.). Used by tools like UMR to query GPU details. + +amdgpu_wave +----------- + +Used to query GFX/compute wave information from the hardware. Used by tools +like UMR to query GFX/compute wave information. + +amdgpu_gpr +---------- + +Used to query GFX/compute GPR (General Purpose Register) information from the +hardware. Used by tools like UMR to query GPRs when debugging shaders. + +amdgpu_gprwave +-------------- + +Provides an IOCTL interface used by UMR for interacting with shader waves. + +amdgpu_fw_attestation +--------------------- + +Provides an interface for reading back firmware attestation records. diff --git a/Documentation/gpu/amdgpu/debugging.rst b/Documentation/gpu/amdgpu/debugging.rst index e75f97d0e4ea..7cbfea0606e1 100644 --- a/Documentation/gpu/amdgpu/debugging.rst +++ b/Documentation/gpu/amdgpu/debugging.rst @@ -2,6 +2,13 @@ GPU Debugging =============== +General Debugging Options +========================= + +The DebugFS section provides documentation on a number files to aid in debugging +issues on the GPU. + + GPUVM Debugging =============== diff --git a/Documentation/gpu/amdgpu/display/dc-debug.rst b/Documentation/gpu/amdgpu/display/dc-debug.rst index 013f63b271f3..605dca21f4ae 100644 --- a/Documentation/gpu/amdgpu/display/dc-debug.rst +++ b/Documentation/gpu/amdgpu/display/dc-debug.rst @@ -154,7 +154,7 @@ of the display parameters, but the userspace might also cause this issue. One way to identify the source of the problem is to take a screenshot or make a desktop video capture when the problem happens; after checking the screenshot/video recording, if you don't see any of the artifacts, it means -that the issue is likely on the the driver side. If you can still see the +that the issue is likely on the driver side. If you can still see the problem in the data collected, it is an issue that probably happened during rendering, and the display code just got the framebuffer already corrupted. diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst index 32723a925377..81256318e93c 100644 --- a/Documentation/gpu/amdgpu/driver-core.rst +++ b/Documentation/gpu/amdgpu/driver-core.rst @@ -67,36 +67,66 @@ GC (Graphics and Compute) This is the graphics and compute engine, i.e., the block that encompasses the 3D pipeline and and shader blocks. This is by far the largest block on the GPU. The 3D pipeline has tons of sub-blocks. In - addition to that, it also contains the CP microcontrollers (ME, PFP, - CE, MEC) and the RLC microcontroller. It's exposed to userspace for - user mode drivers (OpenGL, Vulkan, OpenCL, etc.) + addition to that, it also contains the CP microcontrollers (ME, PFP, CE, + MEC) and the RLC microcontroller. It's exposed to userspace for user mode + drivers (OpenGL, Vulkan, OpenCL, etc.). More details in :ref:`Graphics (GFX) + and Compute `. VCN (Video Core Next) This is the multi-media engine. It handles video and image encode and decode. It's exposed to userspace for user mode drivers (VA-API, OpenMAX, etc.) -Graphics and Compute Microcontrollers -------------------------------------- +.. _pipes-and-queues-description: -CP (Command Processor) - The name for the hardware block that encompasses the front end of the - GFX/Compute pipeline. Consists mainly of a bunch of microcontrollers - (PFP, ME, CE, MEC). The firmware that runs on these microcontrollers - provides the driver interface to interact with the GFX/Compute engine. +GFX, Compute, and SDMA Overall Behavior +======================================= - MEC (MicroEngine Compute) - This is the microcontroller that controls the compute queues on the - GFX/compute engine. +.. note:: For simplicity, whenever the term block is used in this section, it + means GFX, Compute, and SDMA. - MES (MicroEngine Scheduler) - This is a new engine for managing queues. This is currently unused. +GFX, Compute and SDMA share a similar form of operation that can be abstracted +to facilitate understanding of the behavior of these blocks. See the figure +below illustrating the common components of these blocks: -RLC (RunList Controller) - This is another microcontroller in the GFX/Compute engine. It handles - power management related functionality within the GFX/Compute engine. - The name is a vestige of old hardware where it was originally added - and doesn't really have much relation to what the engine does now. +.. kernel-figure:: pipe_and_queue_abstraction.svg + +In the central part of this figure, you can see two hardware elements, one called +**Pipes** and another called **Queues**; it is important to highlight that Queues +must be associated with a Pipe and vice-versa. Every specific hardware IP may have +a different number of Pipes and, in turn, a different number of Queues; for +example, GFX 11 has two Pipes and two Queues per Pipe for the GFX front end. + +Pipe is the hardware that processes the instructions available in the Queues; +in other words, it is a thread executing the operations inserted in the Queue. +One crucial characteristic of Pipes is that they can only execute one Queue at +a time; no matter if the hardware has multiple Queues in the Pipe, it only runs +one Queue per Pipe. + +Pipes have the mechanics of swapping between queues at the hardware level. +Nonetheless, they only make use of Queues that are considered mapped. Pipes can +switch between queues based on any of the following inputs: + +1. Command Stream; +2. Packet by Packet; +3. Other hardware requests the change (e.g., MES). + +Queues within Pipes are defined by the Hardware Queue Descriptors (HQD). +Associated with the HQD concept, we have the Memory Queue Descriptor (MQD), +which is responsible for storing information about the state of each of the +available Queues in the memory. The state of a Queue contains information such +as the GPU virtual address of the queue itself, save areas, doorbell, etc. The +MQD also stores the HQD registers, which are vital for activating or +deactivating a given Queue. The scheduling firmware (e.g., MES) is responsible +for loading HQDs from MQDs and vice versa. + +The Queue-switching process can also happen with the firmware requesting the +preemption or unmapping of a Queue. The firmware waits for the HQD_ACTIVE bit +to change to low before saving the state into the MQD. To make a different +Queue become active, the firmware copies the MQD state into the HQD registers +and loads any additional state. Finally, it sets the HQD_ACTIVE bit to high to +indicate that the queue is active. The Pipe will then execute work from active +Queues. Driver Structure ================ @@ -110,7 +140,8 @@ Some useful constructs: KIQ (Kernel Interface Queue) This is a control queue used by the kernel driver to manage other gfx and compute queues on the GFX/compute engine. You can use it to - map/unmap additional queues, etc. + map/unmap additional queues, etc. This is replaced by MES on + GFX 11 and newer hardware. IB (Indirect Buffer) A command buffer for a particular engine. Rather than writing diff --git a/Documentation/gpu/amdgpu/driver-misc.rst b/Documentation/gpu/amdgpu/driver-misc.rst index e40e15f89fd3..25b0c857816e 100644 --- a/Documentation/gpu/amdgpu/driver-misc.rst +++ b/Documentation/gpu/amdgpu/driver-misc.rst @@ -50,23 +50,6 @@ board_info .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c :doc: board_info -Accelerated Processing Units (APU) Info ---------------------------------------- - -.. csv-table:: - :header-rows: 1 - :widths: 3, 2, 2, 1, 1, 1, 1 - :file: ./apu-asic-info-table.csv - -Discrete GPU Info ------------------ - -.. csv-table:: - :header-rows: 1 - :widths: 3, 2, 2, 1, 1, 1 - :file: ./dgpu-asic-info-table.csv - - GPU Memory Usage Information ============================ diff --git a/Documentation/gpu/amdgpu/gc/index.rst b/Documentation/gpu/amdgpu/gc/index.rst new file mode 100644 index 000000000000..ff6e9ef5cbee --- /dev/null +++ b/Documentation/gpu/amdgpu/gc/index.rst @@ -0,0 +1,52 @@ +.. _amdgpu-gc: + +======================================== + drm/amdgpu - Graphics and Compute (GC) +======================================== + +The relationship between the CPU and GPU can be described as the +producer-consumer problem, where the CPU fills out a buffer with operations +(producer) to be executed by the GPU (consumer). The requested operations in +the buffer are called Command Packets, which can be summarized as a compressed +way of transmitting command information to the graphics controller. + +The component that acts as the front end between the CPU and the GPU is called +the Command Processor (CP). This component is responsible for providing greater +flexibility to the GC since CP makes it possible to program various aspects of +the GPU pipeline. CP also coordinates the communication between the CPU and GPU +via a mechanism named **Ring Buffers**, where the CPU appends information to +the buffer while the GPU removes operations. It is relevant to highlight that a +CPU can add a pointer to the Ring Buffer that points to another region of +memory outside the Ring Buffer, and CP can handle it; this mechanism is called +**Indirect Buffer (IB)**. CP receives and parses the Command Streams (CS), and +writes the operations to the correct hardware blocks. + +Graphics (GFX) and Compute Microcontrollers +------------------------------------------- + +GC is a large block, and as a result, it has multiple firmware associated with +it. Some of them are: + +CP (Command Processor) + The name for the hardware block that encompasses the front end of the + GFX/Compute pipeline. Consists mainly of a bunch of microcontrollers + (PFP, ME, CE, MEC). The firmware that runs on these microcontrollers + provides the driver interface to interact with the GFX/Compute engine. + + MEC (MicroEngine Compute) + This is the microcontroller that controls the compute queues on the + GFX/compute engine. + + MES (MicroEngine Scheduler) + This is the engine for managing queues. For more details check + :ref:`MicroEngine Scheduler (MES) `. + +RLC (RunList Controller) + This is another microcontroller in the GFX/Compute engine. It handles + power management related functionality within the GFX/Compute engine. + The name is a vestige of old hardware where it was originally added + and doesn't really have much relation to what the engine does now. + +.. toctree:: + + mes.rst diff --git a/Documentation/gpu/amdgpu/gc/mes.rst b/Documentation/gpu/amdgpu/gc/mes.rst new file mode 100644 index 000000000000..b99eb211b179 --- /dev/null +++ b/Documentation/gpu/amdgpu/gc/mes.rst @@ -0,0 +1,38 @@ +.. _amdgpu-mes: + +============================= + MicroEngine Scheduler (MES) +============================= + +.. note:: + Queue and ring buffer are used as a synonymous. + +.. note:: + This section assumes that you are familiar with the concept of Pipes, Queues, and GC. + If not, check :ref:`GFX, Compute, and SDMA Overall Behavior` + and :ref:`drm/amdgpu - Graphics and Compute (GC) `. + +Every GFX has a pipe component with one or more hardware queues. Pipes can +switch between queues depending on certain conditions, and one of the +components that can request a queue switch to a pipe is the MicroEngine +Scheduler (MES). Whenever the driver is initialized, it creates one MQD per +hardware queue, and then the MQDs are handed to the MES firmware for mapping +to: + +1. Kernel Queues (legacy): This queue is statically mapped to HQDs and never + preempted. Even though this is a legacy feature, it is the current default, and + most existing hardware supports it. When an application submits work to the + kernel driver, it submits all of the application command buffers to the kernel + queues. The CS IOCTL takes the command buffer from the applications and + schedules them on the kernel queue. + +2. User Queues: These queues are dynamically mapped to the HQDs. Regarding the + utilization of User Queues, the userspace application will create its user + queues and submit work directly to its user queues with no need to IOCTL for + each submission and no need to share a single kernel queue. + +In terms of User Queues, MES can dynamically map them to the HQD. If there are +more MQDs than HQDs, the MES firmware will preempt other user queues to make +sure each queues get a time slice; in other words, MES is a microcontroller +that handles the mapping and unmapping of MQDs into HQDs, as well as the +priorities and oversubscription of MQDs. diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index 302d039928ee..bb2894b5edaf 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -7,8 +7,10 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. .. toctree:: - module-parameters driver-core + amd-hardware-list-info + module-parameters + gc/index display/index flashing xgmi @@ -16,5 +18,6 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. thermal driver-misc debugging + debugfs process-isolation amdgpu-glossary diff --git a/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg b/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg new file mode 100644 index 000000000000..0df3c6b3000b --- /dev/null +++ b/Documentation/gpu/amdgpu/pipe_and_queue_abstraction.svg @@ -0,0 +1,1279 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + . . . + + + + + + + + + + + + . . . + + . . . + Pipe[0] + MQD + Queue[0] + Queue[n] + ... + + + + + + + + + + + + + + + + + . . . + + + + + + + + + + + + . . . + + . . . + Pipe[1] + Queue[0] + Queue[n] + ... + + + + + + + + + + + + + + . . . + + + + + + + + + + + + . . . + + . . . + Pipe[n] + Queue[0] + Queue[n] + ... + + ... + + Hardware Block + EXECUTION + + Memory + + + + + + + + e.g.,:queue[0] + + + + + e.g.,:queue[4] + + + + + e.g.,:queue[n] + + HQD + + + + + HQD + + + + + HQD + + + + + HQD + + + + + HQD + + + + + HQD + + + + Registers + + MQD + MQD + MQD + MQD + MQD + ... + + + HQD RegistersQueue Address in the GPUDoorbell... + + SWITCH QUEUE:WAIT FOR HQD_ACTIVE = 0SAVE QUEUE STATE TO THE MQDCOPY NEW MQD STATESET HQD_ACTIVE = 1 + + Firmware + + + + diff --git a/Documentation/gpu/automated_testing.rst b/Documentation/gpu/automated_testing.rst index 6d7c6086034d..62aa3ede02a5 100644 --- a/Documentation/gpu/automated_testing.rst +++ b/Documentation/gpu/automated_testing.rst @@ -115,6 +115,10 @@ created (eg. https://gitlab.freedesktop.org/janedoe/linux/-/pipelines) 5. The various jobs will be run and when the pipeline is finished, all jobs should be green unless a regression has been found. +6. Warnings in the pipeline indicate that lockdep +(see Documentation/locking/lockdep-design.rst) issues have been detected +during the tests. + How to update test expectations =============================== diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index 971cdb4816fc..1f15a8ca1265 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -27,3 +27,8 @@ drm/xe uAPI =========== .. kernel-doc:: include/uapi/drm/xe_drm.h + +drm/asahi uAPI +================ + +.. kernel-doc:: include/uapi/drm/asahi_drm.h diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index b4ee25af1702..5139705089f2 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -233,6 +233,21 @@ Panel Self Refresh Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_self_refresh_helper.c :export: +HDMI Atomic State Helpers +========================= + +Overview +-------- + +.. kernel-doc:: drivers/gpu/drm/display/drm_hdmi_state_helper.c + :doc: hdmi helpers + +Functions Reference +------------------- + +.. kernel-doc:: drivers/gpu/drm/display/drm_hdmi_state_helper.c + :export: + HDCP Helper Functions Reference =============================== diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst index 0f34131ccc27..b8c801e0068c 100644 --- a/Documentation/gpu/nouveau.rst +++ b/Documentation/gpu/nouveau.rst @@ -27,3 +27,6 @@ GSP Support .. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c :doc: GSP message queue element + +.. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h + :doc: GSP message handling policy diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index ca08377d3b73..8a459fc08812 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -102,7 +102,13 @@ Usage: let boot0 = Boot0::read(&bar); pr_info!("Revision: {}\n", boot0.revision()); +Note: a work-in-progress implementation currently resides in +`drivers/gpu/nova-core/regs/macros.rs` and is used in nova-core. It would be +nice to improve it (possibly using proc macros) and move it to the `kernel` +crate so it can be used by other components as well. + | Complexity: Advanced +| Contact: Alexandre Courbot Delay / Sleep abstractions -------------------------- @@ -190,16 +196,6 @@ Rust abstraction for debugfs APIs. | Reference: Export GSP log buffers | Complexity: Intermediate -Vec extensions --------------- - -Implement ``Vec::truncate`` and ``Vec::resize``. - -Currently this is used for some experimental code to parse the vBIOS. - -| Reference vBIOS support -| Complexity: Beginner - GPU (general) ============= diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 256d0d1cb216..c57777a24e03 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -441,14 +441,15 @@ Contact: Thomas Zimmermann Level: Intermediate -Request memory regions in all drivers -------------------------------------- +Request memory regions in all fbdev drivers +-------------------------------------------- -Go through all drivers and add code to request the memory regions that the -driver uses. This requires adding calls to request_mem_region(), +Old/ancient fbdev drivers do not request their memory properly. +Go through these drivers and add code to request the memory regions +that the driver uses. This requires adding calls to request_mem_region(), pci_request_region() or similar functions. Use helpers for managed cleanup -where possible. - +where possible. Problematic areas include hardware that has exclusive ranges +like VGA. VGA16fb does not request the range as it is expected. Drivers are pretty bad at doing this and there used to be conflicts among DRM and fbdev drivers. Still, it's the correct thing to do. diff --git a/Documentation/gpu/vgaarbiter.rst b/Documentation/gpu/vgaarbiter.rst index bde3c0afb059..d1e953712cc2 100644 --- a/Documentation/gpu/vgaarbiter.rst +++ b/Documentation/gpu/vgaarbiter.rst @@ -11,9 +11,9 @@ Section 7, Legacy Devices. The Resource Access Control (RAC) module inside the X server [0] existed for the legacy VGA arbitration task (besides other bus management tasks) when more -than one legacy device co-exists on the same machine. But the problem happens +than one legacy device co-exist on the same machine. But the problem happens when these devices are trying to be accessed by different userspace clients -(e.g. two server in parallel). Their address assignments conflict. Moreover, +(e.g. two servers in parallel). Their address assignments conflict. Moreover, ideally, being a userspace application, it is not the role of the X server to control bus resources. Therefore an arbitration scheme outside of the X server is needed to control the sharing of these resources. This document introduces @@ -106,7 +106,7 @@ In-kernel interface libpciaccess ------------ -To use the vga arbiter char device it was implemented an API inside the +To use the vga arbiter char device, an API was implemented inside the libpciaccess library. One field was added to struct pci_device (each device on the system):: diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst index 92cfb25e64d3..b2369561f24e 100644 --- a/Documentation/gpu/xe/index.rst +++ b/Documentation/gpu/xe/index.rst @@ -25,3 +25,4 @@ DG2, etc is provided to prototype the driver. xe_debugging xe_devcoredump xe-drm-usage-stats.rst + xe_configfs diff --git a/Documentation/gpu/xe/xe_configfs.rst b/Documentation/gpu/xe/xe_configfs.rst new file mode 100644 index 000000000000..9b9d941eb20e --- /dev/null +++ b/Documentation/gpu/xe/xe_configfs.rst @@ -0,0 +1,10 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +.. _xe_configfs: + +============ +Xe Configfs +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_configfs.c + :doc: Xe Configfs diff --git a/Documentation/gpu/xe/xe_firmware.rst b/Documentation/gpu/xe/xe_firmware.rst index afcb561cd37d..5d23e9f27391 100644 --- a/Documentation/gpu/xe/xe_firmware.rst +++ b/Documentation/gpu/xe/xe_firmware.rst @@ -31,6 +31,12 @@ GuC Power Conservation (PC) .. kernel-doc:: drivers/gpu/drm/xe/xe_guc_pc.c :doc: GuC Power Conservation (PC) +PCIe Gen5 Limitations +===================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_device_sysfs.c + :doc: PCIe Gen5 Limitations + Internal API ============ diff --git a/Documentation/gpu/xe/xe_pcode.rst b/Documentation/gpu/xe/xe_pcode.rst index d2e22cc45061..5937ef3599b0 100644 --- a/Documentation/gpu/xe/xe_pcode.rst +++ b/Documentation/gpu/xe/xe_pcode.rst @@ -12,3 +12,10 @@ Internal API .. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c :internal: + +================== +Boot Survivability +================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_survivability_mode.c + :doc: Xe Boot Survivability diff --git a/MAINTAINERS b/MAINTAINERS index 84d0fc2cea42..f847bf173b0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2341,6 +2341,7 @@ F: drivers/watchdog/apple_wdt.c F: include/dt-bindings/interrupt-controller/apple-aic.h F: include/dt-bindings/pinctrl/apple.h F: include/linux/soc/apple/* +F: include/uapi/drm/asahi_drm.h ARM/ARTPEC MACHINE SUPPORT M: Jesper Nilsson @@ -3916,6 +3917,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/driver-api/auxiliary_bus.rst F: drivers/base/auxiliary.c F: include/linux/auxiliary_bus.h +F: rust/helpers/auxiliary.c +F: rust/kernel/auxiliary.rs +F: samples/rust/rust_driver_auxiliary.rs AUXILIARY DISPLAY DRIVERS M: Andy Shevchenko @@ -7432,8 +7436,7 @@ M: Javier Martinez Canillas L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git -F: drivers/gpu/drm/tiny/ofdrm.c -F: drivers/gpu/drm/tiny/simpledrm.c +F: drivers/gpu/drm/sysfb/ F: drivers/video/aperture.c F: drivers/video/nomodeset.c F: include/linux/aperture.h @@ -7574,6 +7577,7 @@ S: Maintained B: https://gitlab.freedesktop.org/drm/msm/-/issues T: git https://gitlab.freedesktop.org/drm/msm.git F: Documentation/devicetree/bindings/display/msm/gpu.yaml +F: Documentation/devicetree/bindings/opp/opp-v2-qcom-adreno.yaml F: drivers/gpu/drm/msm/adreno/ F: drivers/gpu/drm/msm/msm_gpu.* F: drivers/gpu/drm/msm/msm_gpu_devfreq.* @@ -7627,6 +7631,12 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c +DRM DRIVER FOR NOVATEK NT37801 PANELS +M: Krzysztof Kozlowski +S: Maintained +F: Documentation/devicetree/bindings/display/panel/novatek,nt37801.yaml +F: drivers/gpu/drm/panel/panel-novatek-nt37801.c + DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS M: Lyude Paul M: Danilo Krummrich @@ -7653,6 +7663,18 @@ T: git https://gitlab.freedesktop.org/drm/nova.git nova-next F: Documentation/gpu/nova/ F: drivers/gpu/nova-core/ +DRM DRIVER FOR NVIDIA GPUS [RUST] +M: Danilo Krummrich +L: nouveau@lists.freedesktop.org +S: Supported +Q: https://patchwork.freedesktop.org/project/nouveau/ +B: https://gitlab.freedesktop.org/drm/nova/-/issues +C: irc://irc.oftc.net/nouveau +T: git https://gitlab.freedesktop.org/drm/nova.git nova-next +F: Documentation/gpu/nova/ +F: drivers/gpu/drm/nova/ +F: include/uapi/drm/nova_drm.h + DRM DRIVER FOR OLIMEX LCD-OLINUXINO PANELS M: Stefan Mavrodiev S: Maintained @@ -7742,7 +7764,13 @@ M: David Lechner S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: Documentation/devicetree/bindings/display/sitronix,st7586.txt -F: drivers/gpu/drm/tiny/st7586.c +F: drivers/gpu/drm/sitronix/st7586.c + +DRM DRIVER FOR SITRONIX ST7571 PANELS +M: Marcus Folkesson +S: Maintained +F: Documentation/devicetree/bindings/display/sitronix,st7571.yaml +F: drivers/gpu/drm/sitronix/st7571-i2c.c DRM DRIVER FOR SITRONIX ST7701 PANELS M: Jagan Teki @@ -7763,7 +7791,7 @@ M: David Lechner S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: Documentation/devicetree/bindings/display/sitronix,st7735r.yaml -F: drivers/gpu/drm/tiny/st7735r.c +F: drivers/gpu/drm/sitronix/st7735r.c DRM DRIVER FOR SOLOMON SSD130X OLED DISPLAYS M: Javier Martinez Canillas @@ -7860,6 +7888,7 @@ F: Documentation/devicetree/bindings/display/ F: Documentation/devicetree/bindings/gpu/ F: Documentation/gpu/ F: drivers/gpu/ +F: rust/kernel/drm/ F: include/drm/ F: include/linux/vga* F: include/uapi/drm/ @@ -7876,6 +7905,7 @@ F: Documentation/devicetree/bindings/gpu/ F: Documentation/gpu/ F: drivers/gpu/drm/ F: drivers/gpu/vga/ +F: rust/kernel/drm/ F: include/drm/drm F: include/linux/vga* F: include/uapi/drm/ @@ -7915,8 +7945,8 @@ F: drivers/gpu/drm/ci/xfails/meson* F: drivers/gpu/drm/meson/ DRM DRIVERS FOR ATMEL HLCDC -M: Sam Ravnborg -M: Boris Brezillon +M: Manikandan Muralidharan +M: Dharma Balasubiramani L: dri-devel@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -8244,7 +8274,8 @@ F: drivers/gpu/drm/ttm/ F: include/drm/ttm/ DRM AUTOMATED TESTING -M: Helen Koike +M: Helen Koike +M: Vignesh Raman L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -12004,13 +12035,10 @@ F: drivers/gpio/gpio-tangier.c F: drivers/gpio/gpio-tangier.h INTEL GVT-g DRIVERS (Intel GPU Virtualization) -M: Zhenyu Wang -M: Zhi Wang -L: intel-gvt-dev@lists.freedesktop.org -L: intel-gfx@lists.freedesktop.org -S: Supported +R: Zhenyu Wang +R: Zhi Wang +S: Odd Fixes W: https://github.com/intel/gvt-linux/wiki -T: git https://github.com/intel/gvt-linux.git F: drivers/gpu/drm/i915/gvt/ INTEL HID EVENT DRIVER @@ -25940,6 +25968,7 @@ F: include/uapi/linux/virtio_gpio.h VIRTIO GPU DRIVER M: David Airlie M: Gerd Hoffmann +M: Dmitry Osipenko R: Gurchetan Singh R: Chia-I Wu L: dri-devel@lists.freedesktop.org diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 4936fa5b98ff..8eddf0c96098 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3752,60 +3752,83 @@ }; gpu_opp_table: opp-table { - compatible = "operating-points-v2"; + compatible = "operating-points-v2-adreno", "operating-points-v2"; + + opp-1250000000 { + opp-hz = /bits/ 64 <1250000000>; + opp-level = ; + opp-peak-kBps = <16500000>; + qcom,opp-acd-level = <0xa82a5ffd>; + }; + + opp-1175000000 { + opp-hz = /bits/ 64 <1175000000>; + opp-level = ; + opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82a5ffd>; + }; opp-1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-level = ; - opp-peak-kBps = <16500000>; + opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82a5ffd>; }; opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-level = ; opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82b5ffd>; }; opp-925000000 { opp-hz = /bits/ 64 <925000000>; opp-level = ; opp-peak-kBps = <14398438>; + qcom,opp-acd-level = <0xa82b5ffd>; }; opp-800000000 { opp-hz = /bits/ 64 <800000000>; opp-level = ; opp-peak-kBps = <12449219>; + qcom,opp-acd-level = <0xa82c5ffd>; }; opp-744000000 { opp-hz = /bits/ 64 <744000000>; opp-level = ; opp-peak-kBps = <10687500>; + qcom,opp-acd-level = <0x882e5ffd>; }; opp-687000000 { opp-hz = /bits/ 64 <687000000>; opp-level = ; opp-peak-kBps = <8171875>; + qcom,opp-acd-level = <0x882e5ffd>; }; opp-550000000 { opp-hz = /bits/ 64 <550000000>; opp-level = ; opp-peak-kBps = <6074219>; + qcom,opp-acd-level = <0xc0285ffd>; }; opp-390000000 { opp-hz = /bits/ 64 <390000000>; opp-level = ; opp-peak-kBps = <3000000>; + qcom,opp-acd-level = <0xc0285ffd>; }; opp-300000000 { opp-hz = /bits/ 64 <300000000>; opp-level = ; opp-peak-kBps = <2136719>; + qcom,opp-acd-level = <0xc02b5ffd>; }; }; }; diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO index 5119bccd1917..ad8ac6e315b6 100644 --- a/drivers/accel/amdxdna/TODO +++ b/drivers/accel/amdxdna/TODO @@ -1,3 +1,2 @@ -- Add import and export BO support - Add debugfs support - Add debug BO support diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index 00d215ac866e..e04549f64d69 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -758,27 +758,42 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu static int aie2_populate_range(struct amdxdna_gem_obj *abo) { struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); - struct mm_struct *mm = abo->mem.notifier.mm; - struct hmm_range range = { 0 }; + struct amdxdna_umap *mapp; unsigned long timeout; + struct mm_struct *mm; + bool found; int ret; - XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx", - abo->mem.userptr, abo->mem.size); - range.notifier = &abo->mem.notifier; - range.start = abo->mem.userptr; - range.end = abo->mem.userptr + abo->mem.size; - range.hmm_pfns = abo->mem.pfns; - range.default_flags = HMM_PFN_REQ_FAULT; - - if (!mmget_not_zero(mm)) - return -EFAULT; - timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); again: - range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier); + found = false; + down_write(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (mapp->invalid) { + found = true; + break; + } + } + + if (!found) { + abo->mem.map_invalid = false; + up_write(&xdna->notifier_lock); + return 0; + } + kref_get(&mapp->refcnt); + up_write(&xdna->notifier_lock); + + XDNA_DBG(xdna, "populate memory range %lx %lx", + mapp->vma->vm_start, mapp->vma->vm_end); + mm = mapp->notifier.mm; + if (!mmget_not_zero(mm)) { + amdxdna_umap_put(mapp); + return -EFAULT; + } + + mapp->range.notifier_seq = mmu_interval_read_begin(&mapp->notifier); mmap_read_lock(mm); - ret = hmm_range_fault(&range); + ret = hmm_range_fault(&mapp->range); mmap_read_unlock(mm); if (ret) { if (time_after(jiffies, timeout)) { @@ -786,21 +801,27 @@ again: goto put_mm; } - if (ret == -EBUSY) + if (ret == -EBUSY) { + amdxdna_umap_put(mapp); goto again; + } goto put_mm; } - down_read(&xdna->notifier_lock); - if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) { - up_read(&xdna->notifier_lock); + down_write(&xdna->notifier_lock); + if (mmu_interval_read_retry(&mapp->notifier, mapp->range.notifier_seq)) { + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); goto again; } - abo->mem.map_invalid = false; - up_read(&xdna->notifier_lock); + mapp->invalid = false; + up_write(&xdna->notifier_lock); + amdxdna_umap_put(mapp); + goto again; put_mm: + amdxdna_umap_put(mapp); mmput(mm); return ret; } @@ -908,10 +929,6 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, struct drm_gem_object *gobj = to_gobj(abo); long ret; - down_write(&xdna->notifier_lock); - abo->mem.map_invalid = true; - mmu_interval_set_seq(&abo->mem.notifier, cur_seq); - up_write(&xdna->notifier_lock); ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, true, MAX_SCHEDULE_TIMEOUT); if (!ret || ret == -ERESTARTSYS) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index bf4219e32cc1..82412eec9a4b 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, if (!payload) return -EINVAL; - if (!slot_cf_has_space(offset, payload_len)) + if (!slot_has_space(*buf, offset, payload_len)) return -ENOSPC; buf->cu_idx = cu_idx; @@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) return -EINVAL; - if (!slot_dpu_has_space(offset, arg_sz)) + if (!slot_has_space(*buf, offset, arg_sz)) return -ENOSPC; buf->inst_buf_addr = sn->buffer; @@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, memcpy(buf->args, sn->prop_args, arg_sz); /* Accurate buf size to hint firmware to do necessary copy */ - *size += sizeof(*buf) + arg_sz; + *size = sizeof(*buf) + arg_sz; return 0; } diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 4e02e744b470..6df9065b13f6 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -319,18 +319,16 @@ struct async_event_msg_resp { } __packed; #define MAX_CHAIN_CMDBUF_SIZE SZ_4K -#define slot_cf_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_execbuf_cf, args[0])) +#define slot_has_space(slot, offset, payload_size) \ + (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ + sizeof(typeof(slot))) + struct cmd_chain_slot_execbuf_cf { __u32 cu_idx; __u32 arg_cnt; __u32 args[] __counted_by(arg_cnt); }; -#define slot_dpu_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_dpu, args[0])) struct cmd_chain_slot_dpu { __u64 inst_buf_addr; __u32 inst_size; diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c index 43442b9e273b..be073224bd69 100644 --- a/drivers/accel/amdxdna/amdxdna_ctx.c +++ b/drivers/accel/amdxdna/amdxdna_ctx.c @@ -496,11 +496,11 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, struct amdxdna_drm_exec_cmd *args) { struct amdxdna_dev *xdna = client->xdna; - u32 *arg_bo_hdls; + u32 *arg_bo_hdls = NULL; u32 cmd_bo_hdl; int ret; - if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) { + if (args->arg_count > MAX_ARG_COUNT) { XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); return -EINVAL; } @@ -512,14 +512,16 @@ static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, } cmd_bo_hdl = (u32)args->cmd_handles; - arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); - if (!arg_bo_hdls) - return -ENOMEM; - ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), - args->arg_count * sizeof(u32)); - if (ret) { - ret = -EFAULT; - goto free_cmd_bo_hdls; + if (args->arg_count) { + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); + if (!arg_bo_hdls) + return -ENOMEM; + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), + args->arg_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_cmd_bo_hdls; + } } ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 606433d73236..26831ec69f89 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -9,7 +9,10 @@ #include #include #include +#include +#include #include +#include #include #include "amdxdna_ctx.h" @@ -18,6 +21,8 @@ #define XDNA_MAX_CMD_BO_SIZE SZ_32K +MODULE_IMPORT_NS("DMA_BUF"); + static int amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap) { @@ -55,6 +60,306 @@ amdxdna_gem_insert_node_locked(struct amdxdna_gem_obj *abo, bool use_vmap) return 0; } +static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct amdxdna_umap *mapp = container_of(mni, struct amdxdna_umap, notifier); + struct amdxdna_gem_obj *abo = mapp->abo; + struct amdxdna_dev *xdna; + + xdna = to_xdna_dev(to_gobj(abo)->dev); + XDNA_DBG(xdna, "Invalidating range 0x%lx, 0x%lx, type %d", + mapp->vma->vm_start, mapp->vma->vm_end, abo->type); + + if (!mmu_notifier_range_blockable(range)) + return false; + + down_write(&xdna->notifier_lock); + abo->mem.map_invalid = true; + mapp->invalid = true; + mmu_interval_set_seq(&mapp->notifier, cur_seq); + up_write(&xdna->notifier_lock); + + xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); + + if (range->event == MMU_NOTIFY_UNMAP) { + down_write(&xdna->notifier_lock); + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + up_write(&xdna->notifier_lock); + } + + return true; +} + +static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = { + .invalidate = amdxdna_hmm_invalidate, +}; + +static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + struct amdxdna_umap *mapp; + + down_read(&xdna->notifier_lock); + list_for_each_entry(mapp, &abo->mem.umap_list, node) { + if (!vma || mapp->vma == vma) { + if (!mapp->unmapped) { + queue_work(xdna->notifier_wq, &mapp->hmm_unreg_work); + mapp->unmapped = true; + } + if (vma) + break; + } + } + up_read(&xdna->notifier_lock); +} + +static void amdxdna_umap_release(struct kref *ref) +{ + struct amdxdna_umap *mapp = container_of(ref, struct amdxdna_umap, refcnt); + struct vm_area_struct *vma = mapp->vma; + struct amdxdna_dev *xdna; + + mmu_interval_notifier_remove(&mapp->notifier); + if (is_import_bo(mapp->abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_clear_unevictable(vma->vm_file->f_mapping); + + xdna = to_xdna_dev(to_gobj(mapp->abo)->dev); + down_write(&xdna->notifier_lock); + list_del(&mapp->node); + up_write(&xdna->notifier_lock); + + kvfree(mapp->range.hmm_pfns); + kfree(mapp); +} + +void amdxdna_umap_put(struct amdxdna_umap *mapp) +{ + kref_put(&mapp->refcnt, amdxdna_umap_release); +} + +static void amdxdna_hmm_unreg_work(struct work_struct *work) +{ + struct amdxdna_umap *mapp = container_of(work, struct amdxdna_umap, + hmm_unreg_work); + + amdxdna_umap_put(mapp); +} + +static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long len = vma->vm_end - vma->vm_start; + unsigned long addr = vma->vm_start; + struct amdxdna_umap *mapp; + u32 nr_pages; + int ret; + + if (!xdna->dev_info->ops->hmm_invalidate) + return 0; + + mapp = kzalloc(sizeof(*mapp), GFP_KERNEL); + if (!mapp) + return -ENOMEM; + + nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT; + mapp->range.hmm_pfns = kvcalloc(nr_pages, sizeof(*mapp->range.hmm_pfns), + GFP_KERNEL); + if (!mapp->range.hmm_pfns) { + ret = -ENOMEM; + goto free_map; + } + + ret = mmu_interval_notifier_insert_locked(&mapp->notifier, + current->mm, + addr, + len, + &amdxdna_hmm_ops); + if (ret) { + XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret); + goto free_pfns; + } + + mapp->range.notifier = &mapp->notifier; + mapp->range.start = vma->vm_start; + mapp->range.end = vma->vm_end; + mapp->range.default_flags = HMM_PFN_REQ_FAULT; + mapp->vma = vma; + mapp->abo = abo; + kref_init(&mapp->refcnt); + + if (abo->mem.userptr == AMDXDNA_INVALID_ADDR) + abo->mem.userptr = addr; + INIT_WORK(&mapp->hmm_unreg_work, amdxdna_hmm_unreg_work); + if (is_import_bo(abo) && vma->vm_file && vma->vm_file->f_mapping) + mapping_set_unevictable(vma->vm_file->f_mapping); + + down_write(&xdna->notifier_lock); + list_add_tail(&mapp->node, &abo->mem.umap_list); + up_write(&xdna->notifier_lock); + + return 0; + +free_pfns: + kvfree(mapp->range.hmm_pfns); +free_map: + kfree(mapp); + return ret; +} + +static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); + unsigned long num_pages = vma_pages(vma); + unsigned long offset = 0; + int ret; + + if (!is_import_bo(abo)) { + ret = drm_gem_shmem_mmap(&abo->base, vma); + if (ret) { + XDNA_ERR(xdna, "Failed shmem mmap %d", ret); + return ret; + } + + /* The buffer is based on memory pages. Fix the flag. */ + vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages %d", ret); + vma->vm_ops->close(vma); + return ret; + } + + return 0; + } + + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + ret = dma_buf_mmap(to_gobj(abo)->dma_buf, vma, 0); + if (ret) { + XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret); + return ret; + } + + do { + vm_fault_t fault_ret; + + fault_ret = handle_mm_fault(vma, vma->vm_start + offset, + FAULT_FLAG_WRITE, NULL); + if (fault_ret & VM_FAULT_ERROR) { + vma->vm_ops->close(vma); + XDNA_ERR(xdna, "Fault in page failed"); + return -EFAULT; + } + + offset += PAGE_SIZE; + } while (--num_pages); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + drm_gem_object_put(to_gobj(abo)); + + return 0; +} + +static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj, + struct vm_area_struct *vma) +{ + struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + int ret; + + ret = amdxdna_hmm_register(abo, vma); + if (ret) + return ret; + + ret = amdxdna_insert_pages(abo, vma); + if (ret) { + XDNA_ERR(xdna, "Failed insert pages, ret %d", ret); + goto hmm_unreg; + } + + XDNA_DBG(xdna, "BO map_offset 0x%llx type %d userptr 0x%lx size 0x%lx", + drm_vma_node_offset_addr(&gobj->vma_node), abo->type, + vma->vm_start, gobj->size); + return 0; + +hmm_unreg: + amdxdna_hmm_unregister(abo, vma); + return ret; +} + +static int amdxdna_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + struct drm_gem_object *gobj = dma_buf->priv; + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + unsigned long num_pages = vma_pages(vma); + int ret; + + vma->vm_ops = &drm_gem_shmem_vm_ops; + vma->vm_private_data = gobj; + + drm_gem_object_get(gobj); + ret = drm_gem_shmem_mmap(&abo->base, vma); + if (ret) + goto put_obj; + + /* The buffer is based on memory pages. Fix the flag. */ + vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); + ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, + &num_pages); + if (ret) + goto close_vma; + + return 0; + +close_vma: + vma->vm_ops->close(vma); +put_obj: + drm_gem_object_put(gobj); + return ret; +} + +static const struct dma_buf_ops amdxdna_dmabuf_ops = { + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .mmap = amdxdna_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags) +{ + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &amdxdna_dmabuf_ops; + exp_info.size = gobj->size; + exp_info.flags = flags; + exp_info.priv = gobj; + exp_info.resv = gobj->resv; + + return drm_gem_dmabuf_export(gobj->dev, &exp_info); +} + +static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo) +{ + dma_buf_unmap_attachment_unlocked(abo->attach, abo->base.sgt, DMA_BIDIRECTIONAL); + dma_buf_detach(abo->dma_buf, abo->attach); + dma_buf_put(abo->dma_buf); + drm_gem_object_release(to_gobj(abo)); + kfree(abo); +} + static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) { struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); @@ -62,6 +367,10 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) struct iosys_map map = IOSYS_MAP_INIT_VADDR(abo->mem.kva); XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, abo->mem.dev_addr); + + amdxdna_hmm_unregister(abo, NULL); + flush_workqueue(xdna->notifier_wq); + if (abo->pinned) amdxdna_gem_unpin(abo); @@ -81,8 +390,14 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) if (abo->type == AMDXDNA_BO_DEV_HEAP) drm_mm_takedown(&abo->mm); - drm_gem_vunmap_unlocked(gobj, &map); + drm_gem_vunmap(gobj, &map); mutex_destroy(&abo->lock); + + if (is_import_bo(abo)) { + amdxdna_imported_obj_free(abo); + return; + } + drm_gem_shmem_free(&abo->base); } @@ -90,127 +405,6 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { .free = amdxdna_gem_obj_free, }; -static bool amdxdna_hmm_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) -{ - struct amdxdna_gem_obj *abo = container_of(mni, struct amdxdna_gem_obj, - mem.notifier); - struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); - - XDNA_DBG(xdna, "Invalid range 0x%llx, 0x%lx, type %d", - abo->mem.userptr, abo->mem.size, abo->type); - - if (!mmu_notifier_range_blockable(range)) - return false; - - xdna->dev_info->ops->hmm_invalidate(abo, cur_seq); - - return true; -} - -static const struct mmu_interval_notifier_ops amdxdna_hmm_ops = { - .invalidate = amdxdna_hmm_invalidate, -}; - -static void amdxdna_hmm_unregister(struct amdxdna_gem_obj *abo) -{ - struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); - - if (!xdna->dev_info->ops->hmm_invalidate) - return; - - mmu_interval_notifier_remove(&abo->mem.notifier); - kvfree(abo->mem.pfns); - abo->mem.pfns = NULL; -} - -static int amdxdna_hmm_register(struct amdxdna_gem_obj *abo, unsigned long addr, - size_t len) -{ - struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); - u32 nr_pages; - int ret; - - if (!xdna->dev_info->ops->hmm_invalidate) - return 0; - - if (abo->mem.pfns) - return -EEXIST; - - nr_pages = (PAGE_ALIGN(addr + len) - (addr & PAGE_MASK)) >> PAGE_SHIFT; - abo->mem.pfns = kvcalloc(nr_pages, sizeof(*abo->mem.pfns), - GFP_KERNEL); - if (!abo->mem.pfns) - return -ENOMEM; - - ret = mmu_interval_notifier_insert_locked(&abo->mem.notifier, - current->mm, - addr, - len, - &amdxdna_hmm_ops); - if (ret) { - XDNA_ERR(xdna, "Insert mmu notifier failed, ret %d", ret); - kvfree(abo->mem.pfns); - } - abo->mem.userptr = addr; - - return ret; -} - -static int amdxdna_gem_obj_mmap(struct drm_gem_object *gobj, - struct vm_area_struct *vma) -{ - struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); - unsigned long num_pages; - int ret; - - ret = amdxdna_hmm_register(abo, vma->vm_start, gobj->size); - if (ret) - return ret; - - ret = drm_gem_shmem_mmap(&abo->base, vma); - if (ret) - goto hmm_unreg; - - num_pages = gobj->size >> PAGE_SHIFT; - /* Try to insert the pages */ - vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP); - ret = vm_insert_pages(vma, vma->vm_start, abo->base.pages, &num_pages); - if (ret) - XDNA_ERR(abo->client->xdna, "Failed insert pages, ret %d", ret); - - return 0; - -hmm_unreg: - amdxdna_hmm_unregister(abo); - return ret; -} - -static vm_fault_t amdxdna_gem_vm_fault(struct vm_fault *vmf) -{ - return drm_gem_shmem_vm_ops.fault(vmf); -} - -static void amdxdna_gem_vm_open(struct vm_area_struct *vma) -{ - drm_gem_shmem_vm_ops.open(vma); -} - -static void amdxdna_gem_vm_close(struct vm_area_struct *vma) -{ - struct drm_gem_object *gobj = vma->vm_private_data; - - amdxdna_hmm_unregister(to_xdna_obj(gobj)); - drm_gem_shmem_vm_ops.close(vma); -} - -static const struct vm_operations_struct amdxdna_gem_vm_ops = { - .fault = amdxdna_gem_vm_fault, - .open = amdxdna_gem_vm_open, - .close = amdxdna_gem_vm_close, -}; - static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { .free = amdxdna_gem_obj_free, .print_info = drm_gem_shmem_object_print_info, @@ -220,7 +414,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { .vmap = drm_gem_shmem_object_vmap, .vunmap = drm_gem_shmem_object_vunmap, .mmap = amdxdna_gem_obj_mmap, - .vm_ops = &amdxdna_gem_vm_ops, + .vm_ops = &drm_gem_shmem_vm_ops, + .export = amdxdna_gem_prime_export, }; static struct amdxdna_gem_obj * @@ -239,6 +434,7 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size) abo->mem.userptr = AMDXDNA_INVALID_ADDR; abo->mem.dev_addr = AMDXDNA_INVALID_ADDR; abo->mem.size = size; + INIT_LIST_HEAD(&abo->mem.umap_list); return abo; } @@ -258,6 +454,51 @@ amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size) return to_gobj(abo); } +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct amdxdna_gem_obj *abo; + struct drm_gem_object *gobj; + struct sg_table *sgt; + int ret; + + get_dma_buf(dma_buf); + + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) { + ret = PTR_ERR(attach); + goto put_buf; + } + + sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto fail_detach; + } + + gobj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(gobj)) { + ret = PTR_ERR(gobj); + goto fail_unmap; + } + + abo = to_xdna_obj(gobj); + abo->attach = attach; + abo->dma_buf = dma_buf; + + return gobj; + +fail_unmap: + dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL); +fail_detach: + dma_buf_detach(dma_buf, attach); +put_buf: + dma_buf_put(dma_buf); + + return ERR_PTR(ret); +} + static struct amdxdna_gem_obj * amdxdna_drm_alloc_shmem(struct drm_device *dev, struct amdxdna_drm_create_bo *args, @@ -417,7 +658,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev, abo->type = AMDXDNA_BO_CMD; abo->client = filp->driver_priv; - ret = drm_gem_vmap_unlocked(to_gobj(abo), &map); + ret = drm_gem_vmap(to_gobj(abo), &map); if (ret) { XDNA_ERR(xdna, "Vmap cmd bo failed, ret %d", ret); goto release_obj; @@ -483,6 +724,9 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo) struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); int ret; + if (is_import_bo(abo)) + return 0; + switch (abo->type) { case AMDXDNA_BO_SHMEM: case AMDXDNA_BO_DEV_HEAP: @@ -515,6 +759,9 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo) void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo) { + if (is_import_bo(abo)) + return; + if (abo->type == AMDXDNA_BO_DEV) abo = abo->dev_heap; @@ -606,7 +853,9 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, goto put_obj; } - if (abo->type == AMDXDNA_BO_DEV) + if (is_import_bo(abo)) + drm_clflush_sg(abo->base.sgt); + else if (abo->type == AMDXDNA_BO_DEV) drm_clflush_pages(abo->mem.pages, abo->mem.nr_pages); else drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT); diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h index 8ccc0375dd9d..aee97e971d6d 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.h +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -6,6 +6,20 @@ #ifndef _AMDXDNA_GEM_H_ #define _AMDXDNA_GEM_H_ +#include + +struct amdxdna_umap { + struct vm_area_struct *vma; + struct mmu_interval_notifier notifier; + struct hmm_range range; + struct work_struct hmm_unreg_work; + struct amdxdna_gem_obj *abo; + struct list_head node; + struct kref refcnt; + bool invalid; + bool unmapped; +}; + struct amdxdna_mem { u64 userptr; void *kva; @@ -13,8 +27,7 @@ struct amdxdna_mem { size_t size; struct page **pages; u32 nr_pages; - struct mmu_interval_notifier notifier; - unsigned long *pfns; + struct list_head umap_list; bool map_invalid; }; @@ -31,9 +44,12 @@ struct amdxdna_gem_obj { struct amdxdna_gem_obj *dev_heap; /* For AMDXDNA_BO_DEV */ struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */ u32 assigned_hwctx; + struct dma_buf *dma_buf; + struct dma_buf_attachment *attach; }; #define to_gobj(obj) (&(obj)->base.base) +#define is_import_bo(obj) ((obj)->attach) static inline struct amdxdna_gem_obj *to_xdna_obj(struct drm_gem_object *gobj) { @@ -47,8 +63,12 @@ static inline void amdxdna_gem_put_obj(struct amdxdna_gem_obj *abo) drm_gem_object_put(to_gobj(abo)); } +void amdxdna_umap_put(struct amdxdna_umap *mapp); + struct drm_gem_object * amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size); +struct drm_gem_object * +amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); struct amdxdna_gem_obj * amdxdna_drm_alloc_dev_bo(struct drm_device *dev, struct amdxdna_drm_create_bo *args, diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index f5b8497cf5ad..f2bf1d374cc7 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -226,6 +226,7 @@ const struct drm_driver amdxdna_drm_drv = { .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls), .gem_create_object = amdxdna_gem_create_object_cb, + .gem_prime_import = amdxdna_gem_prime_import, }; static const struct amdxdna_dev_info * @@ -266,12 +267,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) fs_reclaim_release(GFP_KERNEL); } + xdna->notifier_wq = alloc_ordered_workqueue("notifier_wq", 0); + if (!xdna->notifier_wq) + return -ENOMEM; + mutex_lock(&xdna->dev_lock); ret = xdna->dev_info->ops->init(xdna); mutex_unlock(&xdna->dev_lock); if (ret) { XDNA_ERR(xdna, "Hardware init failed, ret %d", ret); - return ret; + goto destroy_notifier_wq; } ret = amdxdna_sysfs_init(xdna); @@ -301,6 +306,8 @@ failed_dev_fini: mutex_lock(&xdna->dev_lock); xdna->dev_info->ops->fini(xdna); mutex_unlock(&xdna->dev_lock); +destroy_notifier_wq: + destroy_workqueue(xdna->notifier_wq); return ret; } @@ -310,6 +317,8 @@ static void amdxdna_remove(struct pci_dev *pdev) struct device *dev = &pdev->dev; struct amdxdna_client *client; + destroy_workqueue(xdna->notifier_wq); + pm_runtime_get_noresume(dev); pm_runtime_forbid(dev); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index 37848a8d8031..ab79600911aa 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -6,6 +6,7 @@ #ifndef _AMDXDNA_PCI_DRV_H_ #define _AMDXDNA_PCI_DRV_H_ +#include #include #define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args) @@ -98,6 +99,7 @@ struct amdxdna_dev { struct list_head client_list; struct amdxdna_fw_ver fw_ver; struct rw_semaphore notifier_lock; /* for mmu notifier*/ + struct workqueue_struct *notifier_wq; }; /* diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index eff1d3ca075f..0e7748c5e117 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -374,6 +374,9 @@ int ivpu_boot(struct ivpu_device *vdev) { int ret; + drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter)); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + /* Update boot params located at first 4KB of FW memory */ ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem)); @@ -573,6 +576,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID; vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID; atomic64_set(&vdev->unique_id_counter, 0); + atomic_set(&vdev->job_timeout_counter, 0); xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 92753effb1c9..5497e7030e91 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -154,6 +154,7 @@ struct ivpu_device { struct mutex submitted_jobs_lock; /* Protects submitted_jobs */ struct xarray submitted_jobs_xa; struct ivpu_ipc_consumer job_done_consumer; + atomic_t job_timeout_counter; atomic64_t unique_id_counter; diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 1d0b2bd9d65c..9a3935be1c05 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -39,6 +39,7 @@ struct ivpu_fw_info { u64 read_only_addr; u32 read_only_size; u32 sched_mode; + u64 last_heartbeat; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index 8741c73b92ce..e0d242d9f3e5 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -30,7 +30,7 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n", action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0, (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc, - (bool)bo->base.base.import_attach); + (bool)drm_gem_is_imported(&bo->base.base)); } /* @@ -122,7 +122,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) bo->ctx = NULL; } - if (bo->base.base.import_attach) + if (drm_gem_is_imported(&bo->base.base)) return; dma_resv_lock(bo->base.base.resv, NULL); @@ -282,7 +282,7 @@ static void ivpu_gem_bo_free(struct drm_gem_object *obj) ivpu_bo_unbind_locked(bo); mutex_destroy(&bo->lock); - drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1); + drm_WARN_ON(obj->dev, refcount_read(&bo->base.pages_use_count) > 1); drm_gem_shmem_free(&bo->base); } @@ -362,7 +362,7 @@ ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, if (flags & DRM_IVPU_BO_MAPPABLE) { dma_resv_lock(bo->base.base.resv, NULL); - ret = drm_gem_shmem_vmap(&bo->base, &map); + ret = drm_gem_shmem_vmap_locked(&bo->base, &map); dma_resv_unlock(bo->base.base.resv); if (ret) @@ -387,7 +387,7 @@ void ivpu_bo_free(struct ivpu_bo *bo) if (bo->flags & DRM_IVPU_BO_MAPPABLE) { dma_resv_lock(bo->base.base.resv, NULL); - drm_gem_shmem_vunmap(&bo->base, &map); + drm_gem_shmem_vunmap_locked(&bo->base, &map); dma_resv_unlock(bo->base.base.resv); } @@ -461,7 +461,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) if (bo->mmu_mapped) drm_printf(p, " mmu_mapped"); - if (bo->base.base.import_attach) + if (drm_gem_is_imported(&bo->base.base)) drm_printf(p, " imported"); drm_printf(p, "\n"); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index ac0e22454596..ea30db181cd7 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -34,6 +34,7 @@ module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); #define PM_RESCHEDULE_LIMIT 5 +#define PM_TDR_HEARTBEAT_LIMIT 30 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) { @@ -44,6 +45,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) ivpu_fw_log_reset(vdev); ivpu_fw_load(vdev); fw->entry_point = fw->cold_boot_entry_point; + fw->last_heartbeat = 0; } static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) @@ -189,7 +191,24 @@ static void ivpu_job_timeout_work(struct work_struct *work) { struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); struct ivpu_device *vdev = pm->vdev; + u64 heartbeat; + if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) { + ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n"); + goto recovery; + } + + if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) { + ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n"); + goto recovery; + } + + vdev->fw->last_heartbeat = heartbeat; + ivpu_start_job_timeout_detection(vdev); + return; + +recovery: + atomic_set(&vdev->job_timeout_counter, 0); ivpu_pm_trigger_recovery(vdev, "TDR"); } @@ -204,6 +223,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev) { cancel_delayed_work_sync(&vdev->pm->job_timeout_work); + atomic_set(&vdev->job_timeout_counter, 0); } int ivpu_pm_suspend_cb(struct device *dev) diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 43aba57b48f0..1bce1af7c72c 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -609,7 +609,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc struct scatterlist *sg; int ret = 0; - if (obj->import_attach) + if (drm_gem_is_imported(obj)) return -EINVAL; for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) { @@ -630,7 +630,7 @@ static void qaic_free_object(struct drm_gem_object *obj) { struct qaic_bo *bo = to_qaic_bo(obj); - if (obj->import_attach) { + if (drm_gem_is_imported(obj)) { /* DMABUF/PRIME Path */ drm_prime_gem_destroy(obj, NULL); } else { @@ -870,7 +870,7 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo, { int ret; - if (bo->base.import_attach) + if (drm_gem_is_imported(&bo->base)) ret = qaic_prepare_import_bo(bo, hdr); else ret = qaic_prepare_export_bo(qdev, bo, hdr); @@ -894,7 +894,7 @@ static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *b static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo) { - if (bo->base.import_attach) + if (drm_gem_is_imported(&bo->base)) qaic_unprepare_import_bo(bo); else qaic_unprepare_export_bo(qdev, bo); diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c index ba0cf2f94732..a991b8198dc4 100644 --- a/drivers/accel/qaic/qaic_debugfs.c +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -240,7 +240,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d mhi_unprepare: mhi_unprepare_from_transfer(mhi_dev); destroy_workqueue: - flush_workqueue(qdev->bootlog_wq); destroy_workqueue(qdev->bootlog_wq); out: return ret; @@ -253,7 +252,6 @@ static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev) qdev = dev_get_drvdata(&mhi_dev->dev); mhi_unprepare_from_transfer(qdev->bootlog_ch); - flush_workqueue(qdev->bootlog_wq); destroy_workqueue(qdev->bootlog_wq); qdev->bootlog_ch = NULL; } diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index ef30445527a2..bcc26785175d 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -53,6 +53,7 @@ struct intel_gtt_driver { * of the mmio register file, that's done in the generic code. */ void (*cleanup)(void); void (*write_entry)(dma_addr_t addr, unsigned int entry, unsigned int flags); + dma_addr_t (*read_entry)(unsigned int entry, bool *is_present, bool *is_local); /* Flags is a more or less chipset specific opaque value. * For chipsets that need to support old ums (non-gem) code, this * needs to be identical to the various supported agp memory types! */ @@ -336,6 +337,19 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } +static dma_addr_t i810_read_entry(unsigned int entry, + bool *is_present, bool *is_local) +{ + u32 val; + + val = readl(intel_private.gtt + entry); + + *is_present = val & I810_PTE_VALID; + *is_local = val & I810_PTE_LOCAL; + + return val & ~0xfff; +} + static resource_size_t intel_gtt_stolen_size(void) { u16 gmch_ctrl; @@ -741,6 +755,19 @@ static void i830_write_entry(dma_addr_t addr, unsigned int entry, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } +static dma_addr_t i830_read_entry(unsigned int entry, + bool *is_present, bool *is_local) +{ + u32 val; + + val = readl(intel_private.gtt + entry); + + *is_present = val & I810_PTE_VALID; + *is_local = false; + + return val & ~0xfff; +} + bool intel_gmch_enable_gtt(void) { u8 __iomem *reg; @@ -878,6 +905,13 @@ void intel_gmch_gtt_insert_sg_entries(struct sg_table *st, } EXPORT_SYMBOL(intel_gmch_gtt_insert_sg_entries); +dma_addr_t intel_gmch_gtt_read_entry(unsigned int pg, + bool *is_present, bool *is_local) +{ + return intel_private.driver->read_entry(pg, is_present, is_local); +} +EXPORT_SYMBOL(intel_gmch_gtt_read_entry); + #if IS_ENABLED(CONFIG_AGP_INTEL) static void intel_gmch_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries, @@ -1126,6 +1160,19 @@ static void i965_write_entry(dma_addr_t addr, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } +static dma_addr_t i965_read_entry(unsigned int entry, + bool *is_present, bool *is_local) +{ + u64 val; + + val = readl(intel_private.gtt + entry); + + *is_present = val & I810_PTE_VALID; + *is_local = false; + + return ((val & 0xf0) << 28) | (val & ~0xfff); +} + static int i9xx_setup(void) { phys_addr_t reg_addr; @@ -1187,6 +1234,7 @@ static const struct intel_gtt_driver i81x_gtt_driver = { .cleanup = i810_cleanup, .check_flags = i830_check_flags, .write_entry = i810_write_entry, + .read_entry = i810_read_entry, }; static const struct intel_gtt_driver i8xx_gtt_driver = { .gen = 2, @@ -1194,6 +1242,7 @@ static const struct intel_gtt_driver i8xx_gtt_driver = { .setup = i830_setup, .cleanup = i830_cleanup, .write_entry = i830_write_entry, + .read_entry = i830_read_entry, .dma_mask_size = 32, .check_flags = i830_check_flags, .chipset_flush = i830_chipset_flush, @@ -1205,6 +1254,7 @@ static const struct intel_gtt_driver i915_gtt_driver = { .cleanup = i9xx_cleanup, /* i945 is the last gpu to need phys mem (for overlay and cursors). */ .write_entry = i830_write_entry, + .read_entry = i830_read_entry, .dma_mask_size = 32, .check_flags = i830_check_flags, .chipset_flush = i9xx_chipset_flush, @@ -1215,6 +1265,7 @@ static const struct intel_gtt_driver g33_gtt_driver = { .setup = i9xx_setup, .cleanup = i9xx_cleanup, .write_entry = i965_write_entry, + .read_entry = i965_read_entry, .dma_mask_size = 36, .check_flags = i830_check_flags, .chipset_flush = i9xx_chipset_flush, @@ -1225,6 +1276,7 @@ static const struct intel_gtt_driver pineview_gtt_driver = { .setup = i9xx_setup, .cleanup = i9xx_cleanup, .write_entry = i965_write_entry, + .read_entry = i965_read_entry, .dma_mask_size = 36, .check_flags = i830_check_flags, .chipset_flush = i9xx_chipset_flush, @@ -1235,6 +1287,7 @@ static const struct intel_gtt_driver i965_gtt_driver = { .setup = i9xx_setup, .cleanup = i9xx_cleanup, .write_entry = i965_write_entry, + .read_entry = i965_read_entry, .dma_mask_size = 36, .check_flags = i830_check_flags, .chipset_flush = i9xx_chipset_flush, @@ -1244,6 +1297,7 @@ static const struct intel_gtt_driver g4x_gtt_driver = { .setup = i9xx_setup, .cleanup = i9xx_cleanup, .write_entry = i965_write_entry, + .read_entry = i965_read_entry, .dma_mask_size = 36, .check_flags = i830_check_flags, .chipset_flush = i9xx_chipset_flush, @@ -1254,6 +1308,7 @@ static const struct intel_gtt_driver ironlake_gtt_driver = { .setup = i9xx_setup, .cleanup = i9xx_cleanup, .write_entry = i965_write_entry, + .read_entry = i965_read_entry, .dma_mask_size = 36, .check_flags = i830_check_flags, .chipset_flush = i9xx_chipset_flush, diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 5baa83b85515..0c48d41dd5eb 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -636,10 +636,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) || !exp_info->ops->release)) return ERR_PTR(-EINVAL); - if (WARN_ON(exp_info->ops->cache_sgt_mapping && - (exp_info->ops->pin || exp_info->ops->unpin))) - return ERR_PTR(-EINVAL); - if (WARN_ON(!exp_info->ops->pin != !exp_info->ops->unpin)) return ERR_PTR(-EINVAL); @@ -782,7 +778,7 @@ static void mangle_sg_table(struct sg_table *sg_table) /* To catch abuse of the underlying struct page by importers mix * up the bits, but take care to preserve the low SG_ bits to - * not corrupt the sgt. The mixing is undone in __unmap_dma_buf + * not corrupt the sgt. The mixing is undone on unmap * before passing the sgt back to the exporter. */ for_each_sgtable_sg(sg_table, sg, i) @@ -790,29 +786,19 @@ static void mangle_sg_table(struct sg_table *sg_table) #endif } -static struct sg_table *__map_dma_buf(struct dma_buf_attachment *attach, - enum dma_data_direction direction) + +static inline bool +dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach) { - struct sg_table *sg_table; - signed long ret; + return !!attach->importer_ops; +} - sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); - if (IS_ERR_OR_NULL(sg_table)) - return sg_table; - - if (!dma_buf_attachment_is_dynamic(attach)) { - ret = dma_resv_wait_timeout(attach->dmabuf->resv, - DMA_RESV_USAGE_KERNEL, true, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) { - attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, - direction); - return ERR_PTR(ret); - } - } - - mangle_sg_table(sg_table); - return sg_table; +static bool +dma_buf_pin_on_map(struct dma_buf_attachment *attach) +{ + return attach->dmabuf->ops->pin && + (!dma_buf_attachment_is_dynamic(attach) || + !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)); } /** @@ -935,48 +921,11 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, list_add(&attach->node, &dmabuf->attachments); dma_resv_unlock(dmabuf->resv); - /* When either the importer or the exporter can't handle dynamic - * mappings we cache the mapping here to avoid issues with the - * reservation object lock. - */ - if (dma_buf_attachment_is_dynamic(attach) != - dma_buf_is_dynamic(dmabuf)) { - struct sg_table *sgt; - - dma_resv_lock(attach->dmabuf->resv, NULL); - if (dma_buf_is_dynamic(attach->dmabuf)) { - ret = dmabuf->ops->pin(attach); - if (ret) - goto err_unlock; - } - - sgt = __map_dma_buf(attach, DMA_BIDIRECTIONAL); - if (!sgt) - sgt = ERR_PTR(-ENOMEM); - if (IS_ERR(sgt)) { - ret = PTR_ERR(sgt); - goto err_unpin; - } - dma_resv_unlock(attach->dmabuf->resv); - attach->sgt = sgt; - attach->dir = DMA_BIDIRECTIONAL; - } - return attach; err_attach: kfree(attach); return ERR_PTR(ret); - -err_unpin: - if (dma_buf_is_dynamic(attach->dmabuf)) - dmabuf->ops->unpin(attach); - -err_unlock: - dma_resv_unlock(attach->dmabuf->resv); - - dma_buf_detach(dmabuf, attach); - return ERR_PTR(ret); } EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, "DMA_BUF"); @@ -995,16 +944,6 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, } EXPORT_SYMBOL_NS_GPL(dma_buf_attach, "DMA_BUF"); -static void __unmap_dma_buf(struct dma_buf_attachment *attach, - struct sg_table *sg_table, - enum dma_data_direction direction) -{ - /* uses XOR, hence this unmangles */ - mangle_sg_table(sg_table); - - attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction); -} - /** * dma_buf_detach - Remove the given attachment from dmabuf's attachments list * @dmabuf: [in] buffer to detach from. @@ -1020,16 +959,7 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) return; dma_resv_lock(dmabuf->resv, NULL); - - if (attach->sgt) { - - __unmap_dma_buf(attach, attach->sgt, attach->dir); - - if (dma_buf_is_dynamic(attach->dmabuf)) - dmabuf->ops->unpin(attach); - } list_del(&attach->node); - dma_resv_unlock(dmabuf->resv); if (dmabuf->ops->detach) @@ -1058,7 +988,7 @@ int dma_buf_pin(struct dma_buf_attachment *attach) struct dma_buf *dmabuf = attach->dmabuf; int ret = 0; - WARN_ON(!dma_buf_attachment_is_dynamic(attach)); + WARN_ON(!attach->importer_ops); dma_resv_assert_held(dmabuf->resv); @@ -1081,7 +1011,7 @@ void dma_buf_unpin(struct dma_buf_attachment *attach) { struct dma_buf *dmabuf = attach->dmabuf; - WARN_ON(!dma_buf_attachment_is_dynamic(attach)); + WARN_ON(!attach->importer_ops); dma_resv_assert_held(dmabuf->resv); @@ -1115,7 +1045,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, enum dma_data_direction direction) { struct sg_table *sg_table; - int r; + signed long ret; might_sleep(); @@ -1124,41 +1054,37 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, dma_resv_assert_held(attach->dmabuf->resv); - if (attach->sgt) { + if (dma_buf_pin_on_map(attach)) { + ret = attach->dmabuf->ops->pin(attach); /* - * Two mappings with different directions for the same - * attachment are not allowed. + * Catch exporters making buffers inaccessible even when + * attachments preventing that exist. */ - if (attach->dir != direction && - attach->dir != DMA_BIDIRECTIONAL) - return ERR_PTR(-EBUSY); - - return attach->sgt; + WARN_ON_ONCE(ret == EBUSY); + if (ret) + return ERR_PTR(ret); } - if (dma_buf_is_dynamic(attach->dmabuf)) { - if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { - r = attach->dmabuf->ops->pin(attach); - if (r) - return ERR_PTR(r); - } - } - - sg_table = __map_dma_buf(attach, direction); + sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); if (!sg_table) sg_table = ERR_PTR(-ENOMEM); + if (IS_ERR(sg_table)) + goto error_unpin; - if (IS_ERR(sg_table) && dma_buf_is_dynamic(attach->dmabuf) && - !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) - attach->dmabuf->ops->unpin(attach); - - if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) { - attach->sgt = sg_table; - attach->dir = direction; + /* + * Importers with static attachments don't wait for fences. + */ + if (!dma_buf_attachment_is_dynamic(attach)) { + ret = dma_resv_wait_timeout(attach->dmabuf->resv, + DMA_RESV_USAGE_KERNEL, true, + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + goto error_unmap; } + mangle_sg_table(sg_table); #ifdef CONFIG_DMA_API_DEBUG - if (!IS_ERR(sg_table)) { + { struct scatterlist *sg; u64 addr; int len; @@ -1175,6 +1101,16 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, } #endif /* CONFIG_DMA_API_DEBUG */ return sg_table; + +error_unmap: + attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction); + sg_table = ERR_PTR(ret); + +error_unpin: + if (dma_buf_pin_on_map(attach)) + attach->dmabuf->ops->unpin(attach); + + return sg_table; } EXPORT_SYMBOL_NS_GPL(dma_buf_map_attachment, "DMA_BUF"); @@ -1227,14 +1163,11 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach, dma_resv_assert_held(attach->dmabuf->resv); - if (attach->sgt == sg_table) - return; + mangle_sg_table(sg_table); + attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction); - __unmap_dma_buf(attach, sg_table, direction); - - if (dma_buf_is_dynamic(attach->dmabuf) && - !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) - dma_buf_unpin(attach); + if (dma_buf_pin_on_map(attach)) + attach->dmabuf->ops->unpin(attach); } EXPORT_SYMBOL_NS_GPL(dma_buf_unmap_attachment, "DMA_BUF"); diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 2a059ac0ed27..a495d8a6c2e3 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -79,6 +79,41 @@ static int fence_cmp(const void *_a, const void *_b) return 0; } +/** + * dma_fence_dedup_array - Sort and deduplicate an array of dma_fence pointers + * @fences: Array of dma_fence pointers to be deduplicated + * @num_fences: Number of entries in the @fences array + * + * Sorts the input array by context, then removes duplicate + * fences with the same context, keeping only the most recent one. + * + * The array is modified in-place and unreferenced duplicate fences are released + * via dma_fence_put(). The function returns the new number of fences after + * deduplication. + * + * Return: Number of unique fences remaining in the array. + */ +int dma_fence_dedup_array(struct dma_fence **fences, int num_fences) +{ + int i, j; + + sort(fences, num_fences, sizeof(*fences), fence_cmp, NULL); + + /* + * Only keep the most recent fence for each context. + */ + j = 0; + for (i = 1; i < num_fences; i++) { + if (fences[i]->context == fences[j]->context) + dma_fence_put(fences[i]); + else + fences[++j] = fences[i]; + } + + return ++j; +} +EXPORT_SYMBOL_GPL(dma_fence_dedup_array); + /* Implementation for the dma_fence_merge() marco, don't use directly */ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, @@ -87,7 +122,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence *tmp, *unsignaled = NULL, **array; struct dma_fence_array *result; ktime_t timestamp; - int i, j, count; + int i, count; count = 0; timestamp = ns_to_ktime(0); @@ -141,19 +176,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, if (count == 0 || count == 1) goto return_fastpath; - sort(array, count, sizeof(*array), fence_cmp, NULL); - - /* - * Only keep the most recent fence for each context. - */ - j = 0; - for (i = 1; i < count; i++) { - if (array[i]->context == array[j]->context) - dma_fence_put(array[i]); - else - array[++j] = array[i]; - } - count = ++j; + count = dma_fence_dedup_array(array, count); if (count > 1) { result = dma_fence_array_create(count, array, diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 26d5dc89ea16..82b1b714300d 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -21,8 +21,6 @@ #include #include -static struct dma_heap *sys_heap; - struct system_heap_buffer { struct dma_heap *heap; struct list_head attachments; @@ -424,6 +422,7 @@ static const struct dma_heap_ops system_heap_ops = { static int __init system_heap_create(void) { struct dma_heap_export_info exp_info; + struct dma_heap *sys_heap; exp_info.name = "system"; exp_info.ops = &system_heap_ops; diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 22a808995f10..4f27ee93a00c 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -173,20 +173,6 @@ static bool timeline_fence_signaled(struct dma_fence *fence) return !__dma_fence_is_later(fence->seqno, parent->value, fence->ops); } -static void timeline_fence_value_str(struct dma_fence *fence, - char *str, int size) -{ - snprintf(str, size, "%lld", fence->seqno); -} - -static void timeline_fence_timeline_value_str(struct dma_fence *fence, - char *str, int size) -{ - struct sync_timeline *parent = dma_fence_parent(fence); - - snprintf(str, size, "%d", parent->value); -} - static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { struct sync_pt *pt = dma_fence_to_sync_pt(fence); @@ -208,8 +194,6 @@ static const struct dma_fence_ops timeline_fence_ops = { .get_timeline_name = timeline_fence_get_timeline_name, .signaled = timeline_fence_signaled, .release = timeline_fence_release, - .fence_value_str = timeline_fence_value_str, - .timeline_value_str = timeline_fence_timeline_value_str, .set_deadline = timeline_fence_set_deadline, }; diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 237bce21d1e7..67cd69551e42 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -12,8 +12,6 @@ static struct dentry *dbgfs; static LIST_HEAD(sync_timeline_list_head); static DEFINE_SPINLOCK(sync_timeline_list_lock); -static LIST_HEAD(sync_file_list_head); -static DEFINE_SPINLOCK(sync_file_list_lock); void sync_timeline_debug_add(struct sync_timeline *obj) { @@ -33,24 +31,6 @@ void sync_timeline_debug_remove(struct sync_timeline *obj) spin_unlock_irqrestore(&sync_timeline_list_lock, flags); } -void sync_file_debug_add(struct sync_file *sync_file) -{ - unsigned long flags; - - spin_lock_irqsave(&sync_file_list_lock, flags); - list_add_tail(&sync_file->sync_file_list, &sync_file_list_head); - spin_unlock_irqrestore(&sync_file_list_lock, flags); -} - -void sync_file_debug_remove(struct sync_file *sync_file) -{ - unsigned long flags; - - spin_lock_irqsave(&sync_file_list_lock, flags); - list_del(&sync_file->sync_file_list); - spin_unlock_irqrestore(&sync_file_list_lock, flags); -} - static const char *sync_status_str(int status) { if (status < 0) @@ -82,25 +62,8 @@ static void sync_print_fence(struct seq_file *s, seq_printf(s, "@%lld.%09ld", (s64)ts64.tv_sec, ts64.tv_nsec); } - if (fence->ops->timeline_value_str && - fence->ops->fence_value_str) { - char value[64]; - bool success; - - fence->ops->fence_value_str(fence, value, sizeof(value)); - success = strlen(value); - - if (success) { - seq_printf(s, ": %s", value); - - fence->ops->timeline_value_str(fence, value, - sizeof(value)); - - if (strlen(value)) - seq_printf(s, " / %s", value); - } - } - + seq_printf(s, ": %lld", fence->seqno); + seq_printf(s, " / %d", parent->value); seq_putc(s, '\n'); } @@ -118,26 +81,6 @@ static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) spin_unlock(&obj->lock); } -static void sync_print_sync_file(struct seq_file *s, - struct sync_file *sync_file) -{ - char buf[128]; - int i; - - seq_printf(s, "[%p] %s: %s\n", sync_file, - sync_file_get_name(sync_file, buf, sizeof(buf)), - sync_status_str(dma_fence_get_status(sync_file->fence))); - - if (dma_fence_is_array(sync_file->fence)) { - struct dma_fence_array *array = to_dma_fence_array(sync_file->fence); - - for (i = 0; i < array->num_fences; ++i) - sync_print_fence(s, array->fences[i], true); - } else { - sync_print_fence(s, sync_file->fence, true); - } -} - static int sync_info_debugfs_show(struct seq_file *s, void *unused) { struct list_head *pos; @@ -157,15 +100,6 @@ static int sync_info_debugfs_show(struct seq_file *s, void *unused) seq_puts(s, "fences:\n--------------\n"); - spin_lock_irq(&sync_file_list_lock); - list_for_each(pos, &sync_file_list_head) { - struct sync_file *sync_file = - container_of(pos, struct sync_file, sync_file_list); - - sync_print_sync_file(s, sync_file); - seq_putc(s, '\n'); - } - spin_unlock_irq(&sync_file_list_lock); return 0; } diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index a1bdd62efccd..02af347293d0 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -68,7 +68,5 @@ extern const struct file_operations sw_sync_debugfs_fops; void sync_timeline_debug_add(struct sync_timeline *obj); void sync_timeline_debug_remove(struct sync_timeline *obj); -void sync_file_debug_add(struct sync_file *fence); -void sync_file_debug_remove(struct sync_file *fence); #endif /* _LINUX_SYNC_H */ diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index e74e36a8ecda..7eee3eb47a8e 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -285,7 +285,6 @@ static int end_cpu_udmabuf(struct dma_buf *buf, } static const struct dma_buf_ops udmabuf_ops = { - .cache_sgt_mapping = true, .map_dma_buf = map_udmabuf, .unmap_dma_buf = unmap_udmabuf, .release = release_udmabuf, diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 7309394b8fc9..e57bff702b5f 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -558,6 +558,7 @@ int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) __weak __alias(__efi_mem_desc_lookup); +EXPORT_SYMBOL_GPL(efi_mem_desc_lookup); /* * Calculate the highest address of an efi memory descriptor. diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index 75a186bf8f8e..592d8a644619 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -35,36 +35,7 @@ __init bool sysfb_parse_mode(const struct screen_info *si, if (type != VIDEO_TYPE_VLFB && type != VIDEO_TYPE_EFI) return false; - /* - * The meaning of depth and bpp for direct-color formats is - * inconsistent: - * - * - DRM format info specifies depth as the number of color - * bits; including alpha, but not including filler bits. - * - Linux' EFI platform code computes lfb_depth from the - * individual color channels, including the reserved bits. - * - VBE 1.1 defines lfb_depth for XRGB1555 as 16, but later - * versions use 15. - * - On the kernel command line, 'bpp' of 32 is usually - * XRGB8888 including the filler bits, but 15 is XRGB1555 - * not including the filler bit. - * - * It's not easily possible to fix this in struct screen_info, - * as this could break UAPI. The best solution is to compute - * bits_per_pixel from the color bits, reserved bits and - * reported lfb_depth, whichever is highest. In the loop below, - * ignore simplefb formats with alpha bits, as EFI and VESA - * don't specify alpha channels. - */ - if (si->lfb_depth > 8) { - bits_per_pixel = max(max3(si->red_size + si->red_pos, - si->green_size + si->green_pos, - si->blue_size + si->blue_pos), - si->rsvd_size + si->rsvd_pos); - bits_per_pixel = max_t(u32, bits_per_pixel, si->lfb_depth); - } else { - bits_per_pixel = si->lfb_depth; - } + bits_per_pixel = __screen_info_lfb_bits_per_pixel(si); for (i = 0; i < ARRAY_SIZE(formats); ++i) { const struct simplefb_format *f = &formats[i]; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f01925ed8176..f094797f3b2b 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -26,6 +26,11 @@ menuconfig DRM details. You should also select and configure AGP (/dev/agpgart) support if it is available for your platform. +menu "DRM debugging options" +depends on DRM +source "drivers/gpu/drm/Kconfig.debug" +endmenu + if DRM config DRM_MIPI_DBI @@ -37,65 +42,6 @@ config DRM_MIPI_DSI bool depends on DRM -config DRM_DEBUG_MM - bool "Insert extra checks and debug info into the DRM range managers" - default n - depends on DRM - depends on STACKTRACE_SUPPORT - select STACKDEPOT - help - Enable allocation tracking of memory manager and leak detection on - shutdown. - - Recommended for driver developers only. - - If in doubt, say "N". - -config DRM_USE_DYNAMIC_DEBUG - bool "use dynamic debug to implement drm.debug" - default n - depends on BROKEN - depends on DRM - depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE - depends on JUMP_LABEL - help - Use dynamic-debug to avoid drm_debug_enabled() runtime overheads. - Due to callsite counts in DRM drivers (~4k in amdgpu) and 56 - bytes per callsite, the .data costs can be substantial, and - are therefore configurable. - -config DRM_KUNIT_TEST_HELPERS - tristate - depends on DRM && KUNIT - select DRM_KMS_HELPER - help - KUnit Helpers for KMS drivers. - -config DRM_KUNIT_TEST - tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS - depends on DRM && KUNIT && MMU - select DRM_BUDDY - select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HDMI_STATE_HELPER - select DRM_DISPLAY_HELPER - select DRM_EXEC - select DRM_EXPORT_FOR_TESTS if m - select DRM_GEM_SHMEM_HELPER - select DRM_KUNIT_TEST_HELPERS - select DRM_LIB_RANDOM - select PRIME_NUMBERS - default KUNIT_ALL_TESTS - help - This builds unit tests for DRM. This option is not useful for - distributions or general kernels, but only for kernel - developers working on DRM and associated drivers. - - For more information on KUnit and unit tests in general, - please refer to the KUnit documentation in - Documentation/dev-tools/kunit/. - - If in doubt, say "N". - config DRM_KMS_HELPER tristate depends on DRM @@ -247,23 +193,6 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. -config DRM_TTM_KUNIT_TEST - tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS - default n - depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) - select DRM_TTM - select DRM_BUDDY - select DRM_EXPORT_FOR_TESTS if m - select DRM_KUNIT_TEST_HELPERS - default KUNIT_ALL_TESTS - help - Enables unit tests for TTM, a GPU memory manager subsystem used - to manage memory buffers. This option is mostly useful for kernel - developers. It depends on (UML || COMPILE_TEST) since no other driver - which uses TTM can be loaded while running the tests. - - If in doubt, say "N". - config DRM_EXEC tristate depends on DRM @@ -335,6 +264,8 @@ config DRM_SCHED tristate depends on DRM +source "drivers/gpu/drm/sysfb/Kconfig" + source "drivers/gpu/drm/arm/Kconfig" source "drivers/gpu/drm/radeon/Kconfig" @@ -343,6 +274,8 @@ source "drivers/gpu/drm/amd/amdgpu/Kconfig" source "drivers/gpu/drm/nouveau/Kconfig" +source "drivers/gpu/drm/nova/Kconfig" + source "drivers/gpu/drm/i915/Kconfig" source "drivers/gpu/drm/xe/Kconfig" @@ -454,6 +387,8 @@ source "drivers/gpu/drm/xlnx/Kconfig" source "drivers/gpu/drm/gud/Kconfig" +source "drivers/gpu/drm/sitronix/Kconfig" + source "drivers/gpu/drm/solomon/Kconfig" source "drivers/gpu/drm/sprd/Kconfig" @@ -474,9 +409,6 @@ config DRM_HYPERV If M is selected the module will be called hyperv_drm. -config DRM_EXPORT_FOR_TESTS - bool - # Separate option as not all DRM drivers use it config DRM_PANEL_BACKLIGHT_QUIRKS tristate @@ -489,31 +421,6 @@ config DRM_PRIVACY_SCREEN bool default n -config DRM_WERROR - bool "Compile the drm subsystem with warnings as errors" - depends on DRM && EXPERT - depends on !WERROR - default n - help - A kernel build should not cause any compiler warnings, and this - enables the '-Werror' flag to enforce that rule in the drm subsystem. - - The drm subsystem enables more warnings than the kernel default, so - this config option is disabled by default. - - If in doubt, say N. - -config DRM_HEADER_TEST - bool "Ensure DRM headers are self-contained and pass kernel-doc" - depends on DRM && EXPERT && BROKEN - default n - help - Ensure the DRM subsystem headers both under drivers/gpu/drm and - include/drm compile, are self-contained, have header guards, and have - no kernel-doc warnings. - - If in doubt, say N. - endif # Separate option because drm_panel_orientation_quirks.c is shared with fbdev diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug new file mode 100644 index 000000000000..fa6ee76f4d3c --- /dev/null +++ b/drivers/gpu/drm/Kconfig.debug @@ -0,0 +1,116 @@ +config DRM_USE_DYNAMIC_DEBUG + bool "use dynamic debug to implement drm.debug" + default n + depends on BROKEN + depends on DRM + depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE + depends on JUMP_LABEL + help + Use dynamic-debug to avoid drm_debug_enabled() runtime overheads. + Due to callsite counts in DRM drivers (~4k in amdgpu) and 56 + bytes per callsite, the .data costs can be substantial, and + are therefore configurable. + +config DRM_WERROR + bool "Compile the drm subsystem with warnings as errors" + depends on DRM && EXPERT + depends on !WERROR + default n + help + A kernel build should not cause any compiler warnings, and this + enables the '-Werror' flag to enforce that rule in the drm subsystem. + + The drm subsystem enables more warnings than the kernel default, so + this config option is disabled by default. + + If in doubt, say N. + +config DRM_HEADER_TEST + bool "Ensure DRM headers are self-contained and pass kernel-doc" + depends on DRM && EXPERT && BROKEN + default n + help + Ensure the DRM subsystem headers both under drivers/gpu/drm and + include/drm compile, are self-contained, have header guards, and have + no kernel-doc warnings. + + If in doubt, say N. + +config DRM_DEBUG_MM + bool "Insert extra checks and debug info into the DRM range managers" + default n + depends on DRM + depends on STACKTRACE_SUPPORT + select STACKDEPOT + help + Enable allocation tracking of memory manager and leak detection on + shutdown. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_KUNIT_TEST_HELPERS + tristate + depends on DRM && KUNIT + select DRM_KMS_HELPER + help + KUnit Helpers for KMS drivers. + +config DRM_KUNIT_TEST + tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS + depends on DRM && KUNIT && MMU + select DRM_BRIDGE_CONNECTOR + select DRM_BUDDY + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HDMI_STATE_HELPER + select DRM_DISPLAY_HELPER + select DRM_EXEC + select DRM_EXPORT_FOR_TESTS if m + select DRM_GEM_SHMEM_HELPER + select DRM_KUNIT_TEST_HELPERS + select DRM_LIB_RANDOM + select PRIME_NUMBERS + default KUNIT_ALL_TESTS + help + This builds unit tests for DRM. This option is not useful for + distributions or general kernels, but only for kernel + developers working on DRM and associated drivers. + + For more information on KUnit and unit tests in general, + please refer to the KUnit documentation in + Documentation/dev-tools/kunit/. + + If in doubt, say "N". + +config DRM_TTM_KUNIT_TEST + tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS + default n + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) + select DRM_TTM + select DRM_BUDDY + select DRM_EXPORT_FOR_TESTS if m + select DRM_KUNIT_TEST_HELPERS + default KUNIT_ALL_TESTS + help + Enables unit tests for TTM, a GPU memory manager subsystem used + to manage memory buffers. This option is mostly useful for kernel + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. + + If in doubt, say "N". + +config DRM_SCHED_KUNIT_TEST + tristate "KUnit tests for the DRM scheduler" if !KUNIT_ALL_TESTS + select DRM_SCHED + depends on DRM && KUNIT + default KUNIT_ALL_TESTS + help + Choose this option to build unit tests for the DRM scheduler. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_EXPORT_FOR_TESTS + bool diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index d21d0cd2c752..5050ac32bba2 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -134,6 +134,7 @@ obj-$(CONFIG_DRM_TTM_HELPER) += drm_ttm_helper.o drm_kms_helper-y := \ drm_atomic_helper.o \ drm_atomic_state_helper.o \ + drm_bridge_helper.o \ drm_crtc_helper.o \ drm_damage_helper.o \ drm_flip_work.o \ @@ -176,6 +177,7 @@ obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/ obj-$(CONFIG_DRM_VGEM) += vgem/ obj-$(CONFIG_DRM_VKMS) += vkms/ obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/ +obj-$(CONFIG_DRM_NOVA) += nova/ obj-$(CONFIG_DRM_EXYNOS) +=exynos/ obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/ obj-$(CONFIG_DRM_GMA500) += gma500/ @@ -204,6 +206,7 @@ obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/ obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/ obj-y += hisilicon/ obj-y += mxsfb/ +obj-y += sysfb/ obj-y += tiny/ obj-$(CONFIG_DRM_PL111) += pl111/ obj-$(CONFIG_DRM_TVE200) += tve200/ @@ -219,6 +222,7 @@ obj-$(CONFIG_DRM_TIDSS) += tidss/ obj-y += xlnx/ obj-y += gud/ obj-$(CONFIG_DRM_HYPERV) += hyperv/ +obj-y += sitronix/ obj-y += solomon/ obj-$(CONFIG_DRM_SPRD) += sprd/ obj-$(CONFIG_DRM_LOONGSON) += loongson/ diff --git a/drivers/gpu/drm/adp/adp-mipi.c b/drivers/gpu/drm/adp/adp-mipi.c index ad80542b60ed..2b60128e2c69 100644 --- a/drivers/gpu/drm/adp/adp-mipi.c +++ b/drivers/gpu/drm/adp/adp-mipi.c @@ -212,12 +212,13 @@ static const struct mipi_dsi_host_ops adp_dsi_host_ops = { }; static int adp_dsi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct adp_mipi_drv_private *adp = container_of(bridge, struct adp_mipi_drv_private, bridge); - return drm_bridge_attach(bridge->encoder, adp->next_bridge, bridge, flags); + return drm_bridge_attach(encoder, adp->next_bridge, bridge, flags); } static const struct drm_bridge_funcs adp_dsi_bridge_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index aacc810cabb3..87080c06e5fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -174,7 +174,10 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_mes.o \ mes_v11_0.o \ - mes_v12_0.o + mes_v12_0.o \ + +# add GFX userqueue support +amdgpu-y += mes_userqueue.o # add UVD block amdgpu-y += \ @@ -253,6 +256,8 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o +# add gfx usermode queue +amdgpu-y += amdgpu_userq.o ifneq ($(CONFIG_HSA_AMD),) AMDKFD_PATH := ../amdkfd diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c3641331d4de..836ea081088a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -113,6 +113,8 @@ #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" +#include "amdgpu_userq.h" +#include "amdgpu_eviction_fence.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif @@ -228,7 +230,7 @@ extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; extern int amdgpu_mtype_local; -extern bool enforce_isolation; +extern int amdgpu_enforce_isolation; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; @@ -266,8 +268,10 @@ extern int amdgpu_umsch_mm_fwlog; extern int amdgpu_user_partt_mode; extern int amdgpu_agp; +extern int amdgpu_rebar; extern int amdgpu_wbrf; +extern int amdgpu_user_queue; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) @@ -488,7 +492,6 @@ struct amdgpu_flip_work { bool async; }; - /* * file private structure */ @@ -501,6 +504,11 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + struct amdgpu_userq_mgr userq_mgr; + + /* Eviction fence infra */ + struct amdgpu_eviction_fence_mgr evf_mgr; + /** GPU partition selection */ uint32_t xcp_id; }; @@ -512,12 +520,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); */ #define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */ +/** + * amdgpu_wb - This struct is used for small GPU memory allocation. + * + * This struct is used to allocate a small amount of GPU memory that can be + * used to shadow certain states into the memory. This is especially useful for + * providing easy CPU access to some states without requiring register access + * (e.g., if some block is power gated, reading register may be problematic). + * + * Note: the term writeback was initially used because many of the amdgpu + * components had some level of writeback memory, and this struct initially + * described those components. + */ struct amdgpu_wb { + + /** + * @wb_obj: + * + * Buffer Object used for the writeback memory. + */ struct amdgpu_bo *wb_obj; + + /** + * @wb: + * + * Pointer to the first writeback slot. In terms of CPU address + * this value can be accessed directly by using the offset as an index. + * For the GPU address, it is necessary to use gpu_addr and the offset. + */ volatile uint32_t *wb; + + /** + * @gpu_addr: + * + * Writeback base address in the GPU. + */ uint64_t gpu_addr; - u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */ + + /** + * @num_wb: + * + * Number of writeback slots reserved for amdgpu. + */ + u32 num_wb; + + /** + * @used: + * + * Track the writeback slot already used. + */ unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)]; + + /** + * @lock: + * + * Protects read and write of the used field array. + */ spinlock_t lock; }; @@ -551,6 +609,7 @@ struct amdgpu_allowed_register_entry { * are reset depends on the ASIC. Notably doesn't reset IPs * shared with the CPU on APUs or the memory controllers (so * VRAM is not lost). Not available on all ASICs. + * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card * but without powering off the PCI bus. Suitable only for * discrete GPUs. @@ -568,6 +627,7 @@ enum amd_reset_method { AMD_RESET_METHOD_MODE0, AMD_RESET_METHOD_MODE1, AMD_RESET_METHOD_MODE2, + AMD_RESET_METHOD_LINK, AMD_RESET_METHOD_BACO, AMD_RESET_METHOD_PCI, AMD_RESET_METHOD_ON_INIT, @@ -821,6 +881,11 @@ struct amdgpu_mqd_prop { uint32_t hqd_queue_priority; bool allow_tunneling; bool hqd_active; + uint64_t shadow_addr; + uint64_t gds_bkup_addr; + uint64_t csa_addr; + uint64_t fence_address; + bool tmz_queue; }; struct amdgpu_mqd { @@ -829,6 +894,12 @@ struct amdgpu_mqd { struct amdgpu_mqd_prop *p); }; +struct amdgpu_pcie_reset_ctx { + bool in_link_reset; + bool occurs_dpc; + bool audio_suspended; +}; + /* * Custom Init levels could be defined for different situations where a full * initialization of all hardware blocks are not expected. Sample cases are @@ -853,6 +924,14 @@ struct amdgpu_init_level { struct amdgpu_reset_domain; struct amdgpu_fru_info; +enum amdgpu_enforce_isolation_mode { + AMDGPU_ENFORCE_ISOLATION_DISABLE = 0, + AMDGPU_ENFORCE_ISOLATION_ENABLE = 1, + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2, + AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3, +}; + + /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1081,6 +1160,13 @@ struct amdgpu_device { bool enable_uni_mes; struct amdgpu_mes mes; struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM]; + const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM]; + + /* xarray used to retrieve the user queue fence driver reference + * in the EOP interrupt handler to signal the particular user + * queue fence. + */ + struct xarray userq_xa; /* df */ struct amdgpu_df df; @@ -1160,6 +1246,8 @@ struct amdgpu_device { struct pci_saved_state *pci_state; pci_channel_state_t pci_channel_state; + struct amdgpu_pcie_reset_ctx pcie_reset_ctx; + /* Track auto wait count on s_barrier settings */ bool barrier_has_auto_waitcnt; @@ -1193,10 +1281,11 @@ struct amdgpu_device { bool debug_enable_ras_aca; bool debug_exp_resets; bool debug_disable_gpu_ring_reset; + bool debug_vm_userptr; /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; - bool enforce_isolation[MAX_XCP]; + enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP]; struct amdgpu_isolation { void *owner; struct dma_fence *spearhead; @@ -1210,6 +1299,10 @@ struct amdgpu_device { * in KFD: VRAM or GTT. */ bool apu_prefer_gtt; + + struct list_head userq_mgr_list; + struct mutex userq_mutex; + bool userq_halt_for_enforce_isolation; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1464,6 +1557,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); int amdgpu_device_mode1_reset(struct amdgpu_device *adev); +int amdgpu_device_link_reset(struct amdgpu_device *adev); bool amdgpu_device_supports_atpx(struct drm_device *dev); bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index b4ad163f42a7..3835f2592914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -120,6 +120,9 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st for (i = 0; i < ARRAY_SIZE(aca_regs); i++) RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); + + if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS])) + RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n"); } static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d2ec4130a316..260165bbe373 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2559,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, if (ret != -EFAULT) return ret; + /* If applications unmap memory before destroying the userptr + * from the KFD, trigger a segmentation fault in VM debug mode. + */ + if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) { + pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n", + pid_nr(process_info->pid), mem->va); + + // Send GPU VM fault to user space + kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid), + mem->va); + } + ret = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index eb015bdda8a7..c7d32fb216e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -281,6 +281,9 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, case ATOM_DGPU_VRAM_TYPE_GDDR6: vram_type = AMDGPU_VRAM_TYPE_GDDR6; break; + case ATOM_DGPU_VRAM_TYPE_HBM3E: + vram_type = AMDGPU_VRAM_TYPE_HBM3E; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 68bce6a6d09d..004a6a9d6b9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -252,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (!adev->pm.fw) { switch (adev->asic_type) { - case CHIP_TAHITI: - strcpy(fw_name, "radeon/tahiti_smc.bin"); - break; - case CHIP_PITCAIRN: - if ((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6810) || - (adev->pdev->device == 0x6811))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/pitcairn_k_smc.bin"); - } else { - strcpy(fw_name, "radeon/pitcairn_smc.bin"); - } - break; - case CHIP_VERDE: - if (((adev->pdev->device == 0x6820) && - ((adev->pdev->revision == 0x81) || - (adev->pdev->revision == 0x83))) || - ((adev->pdev->device == 0x6821) && - ((adev->pdev->revision == 0x83) || - (adev->pdev->revision == 0x87))) || - ((adev->pdev->revision == 0x87) && - ((adev->pdev->device == 0x6823) || - (adev->pdev->device == 0x682b)))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/verde_k_smc.bin"); - } else { - strcpy(fw_name, "radeon/verde_smc.bin"); - } - break; - case CHIP_OLAND: - if (((adev->pdev->revision == 0x81) && - ((adev->pdev->device == 0x6600) || - (adev->pdev->device == 0x6604) || - (adev->pdev->device == 0x6605) || - (adev->pdev->device == 0x6610))) || - ((adev->pdev->revision == 0x83) && - (adev->pdev->device == 0x6610))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/oland_k_smc.bin"); - } else { - strcpy(fw_name, "radeon/oland_smc.bin"); - } - break; - case CHIP_HAINAN: - if (((adev->pdev->revision == 0x81) && - (adev->pdev->device == 0x6660)) || - ((adev->pdev->revision == 0x83) && - ((adev->pdev->device == 0x6660) || - (adev->pdev->device == 0x6663) || - (adev->pdev->device == 0x6665) || - (adev->pdev->device == 0x6667)))) { - info->is_kicker = true; - strcpy(fw_name, "radeon/hainan_k_smc.bin"); - } else if ((adev->pdev->revision == 0xc3) && - (adev->pdev->device == 0x6665)) { - info->is_kicker = true; - strcpy(fw_name, "radeon/banks_k_2_smc.bin"); - } else { - strcpy(fw_name, "radeon/hainan_smc.bin"); - } - break; case CHIP_BONAIRE: if ((adev->pdev->revision == 0x80) || (adev->pdev->revision == 0x81) || (adev->pdev->device == 0x665f)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/bonaire_k_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/bonaire_smc.bin"); + strscpy(fw_name, "amdgpu/bonaire_smc.bin"); } break; case CHIP_HAWAII: if (adev->pdev->revision == 0x80) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/hawaii_k_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/hawaii_smc.bin"); + strscpy(fw_name, "amdgpu/hawaii_smc.bin"); } break; case CHIP_TOPAZ: @@ -338,76 +277,76 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) || ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/topaz_smc.bin"); + strscpy(fw_name, "amdgpu/topaz_smc.bin"); break; case CHIP_TONGA: if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) || ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/tonga_k_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_k_smc.bin"); } else - strcpy(fw_name, "amdgpu/tonga_smc.bin"); + strscpy(fw_name, "amdgpu/tonga_smc.bin"); break; case CHIP_FIJI: - strcpy(fw_name, "amdgpu/fiji_smc.bin"); + strscpy(fw_name, "amdgpu/fiji_smc.bin"); break; case CHIP_POLARIS11: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k_smc.bin"); } else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris11_smc.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); } break; case CHIP_POLARIS10: if (type == CGS_UCODE_ID_SMU) { if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k_smc.bin"); } else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris10_smc.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc.bin"); } } else if (type == CGS_UCODE_ID_SMU_SK) { - strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); + strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); } break; case CHIP_POLARIS12: if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; - strcpy(fw_name, "amdgpu/polaris12_k_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_k_smc.bin"); } else { - strcpy(fw_name, "amdgpu/polaris12_smc.bin"); + strscpy(fw_name, "amdgpu/polaris12_smc.bin"); } break; case CHIP_VEGAM: - strcpy(fw_name, "amdgpu/vegam_smc.bin"); + strscpy(fw_name, "amdgpu/vegam_smc.bin"); break; case CHIP_VEGA10: if ((adev->pdev->device == 0x687f) && ((adev->pdev->revision == 0xc0) || (adev->pdev->revision == 0xc1) || (adev->pdev->revision == 0xc3))) - strcpy(fw_name, "amdgpu/vega10_acg_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_acg_smc.bin"); else - strcpy(fw_name, "amdgpu/vega10_smc.bin"); + strscpy(fw_name, "amdgpu/vega10_smc.bin"); break; case CHIP_VEGA12: - strcpy(fw_name, "amdgpu/vega12_smc.bin"); + strscpy(fw_name, "amdgpu/vega12_smc.bin"); break; case CHIP_VEGA20: - strcpy(fw_name, "amdgpu/vega20_smc.bin"); + strscpy(fw_name, "amdgpu/vega20_smc.bin"); break; default: DRM_ERROR("SMC firmware not supported\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 360e07a5c7c1..5a234eadae8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -549,7 +549,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev) { int r; - if (!amdgpu_aca_is_enabled(adev)) + if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) return 0; r = amdgpu_cper_ring_init(adev); @@ -568,7 +568,7 @@ int amdgpu_cper_init(struct amdgpu_device *adev) int amdgpu_cper_fini(struct amdgpu_device *adev) { - if (!amdgpu_aca_is_enabled(adev)) + if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) return 0; adev->cper.enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82df06a72ee0..9ea0d9b71f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -296,7 +296,25 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; - p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; + switch (p->adev->enforce_isolation[fpriv->xcp_id]) { + case AMDGPU_ENFORCE_ISOLATION_DISABLE: + default: + p->jobs[i]->enforce_isolation = false; + p->jobs[i]->run_cleaner_shader = false; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = true; + break; + case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = false; + break; + case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER: + p->jobs[i]->enforce_isolation = true; + p->jobs[i]->run_cleaner_shader = false; + break; + } } p->gang_leader = p->jobs[p->gang_leader_idx]; @@ -349,6 +367,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, ring = amdgpu_job_ring(job); ib = &job->ibs[job->num_ibs++]; + /* submissions to kernel queues are disabled */ + if (ring->no_user_submission) + return -EINVAL; + /* MM engine doesn't support user fences */ if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a1450f13d963..8e626f50b362 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2105,6 +2105,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); + amdgpu_psp_debugfs_init(adev); debugfs_create_file("amdgpu_evict_vram", 0400, root, adev, &amdgpu_evict_vram_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f8b3e04d71ed..4d1b54f58495 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -85,6 +85,7 @@ #if IS_ENABLED(CONFIG_X86) #include +#include #endif MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); @@ -1680,6 +1681,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + if (!amdgpu_rebar) + return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ if ((amdgpu_runtime_pm != 0) && adev->pdev->vendor == PCI_VENDOR_ID_ATI && @@ -1870,6 +1874,35 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device return true; } +static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) + return false; + + if (c->x86 == 6 && + adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) { + switch (c->x86_model) { + case VFM_MODEL(INTEL_ALDERLAKE): + case VFM_MODEL(INTEL_ALDERLAKE_L): + case VFM_MODEL(INTEL_RAPTORLAKE): + case VFM_MODEL(INTEL_RAPTORLAKE_P): + case VFM_MODEL(INTEL_RAPTORLAKE_S): + return true; + default: + return false; + } + } else { + return false; + } +#else + return false; +#endif +} + /** * amdgpu_device_should_use_aspm - check if the device should program ASPM * @@ -1894,7 +1927,7 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) } if (adev->flags & AMD_IS_APU) return false; - if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) + if (amdgpu_device_aspm_support_quirk(adev)) return false; return pcie_aspm_enabled(adev->pdev); } @@ -2112,8 +2145,31 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - for (i = 0; i < MAX_XCP; i++) - adev->enforce_isolation[i] = !!enforce_isolation; + for (i = 0; i < MAX_XCP; i++) { + switch (amdgpu_enforce_isolation) { + case -1: + case 0: + default: + /* disable */ + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + /* enable */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + /* enable legacy mode */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + case 3: + /* enable only process isolation without submitting cleaner shader */ + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER; + break; + } + } return 0; } @@ -2689,6 +2745,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; } + /* Check for IP version 9.4.3 with A0 hardware */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && + !amdgpu_device_get_rev_id(adev)) { + dev_err(adev->dev, "Unsupported A0 hardware\n"); + return -ENODEV; /* device unsupported - no device error */ + } + if (amdgpu_has_atpx() && (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && @@ -2701,7 +2764,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } - adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; @@ -3172,6 +3234,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * always assumed to be lost. */ switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_LINK: case AMD_RESET_METHOD_BACO: case AMD_RESET_METHOD_MODE1: return true; @@ -3455,6 +3518,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); amdgpu_amdkfd_suspend(adev, false); + amdgpu_userq_suspend(adev); /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); @@ -4307,9 +4371,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_sync_create(&adev->isolation[i].active); amdgpu_sync_create(&adev->isolation[i].prev); } - mutex_init(&adev->gfx.kfd_sch_mutex); + mutex_init(&adev->gfx.userq_sch_mutex); mutex_init(&adev->gfx.workload_profile_mutex); mutex_init(&adev->vcn.workload_profile_mutex); + mutex_init(&adev->userq_mutex); amdgpu_device_init_apu_flags(adev); @@ -4329,12 +4394,16 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); + xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ); + INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); INIT_LIST_HEAD(&adev->pm.od_kobj_list); + INIT_LIST_HEAD(&adev->userq_mgr_list); + INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, @@ -5003,8 +5072,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) + if (!adev->in_s0ix) { amdgpu_amdkfd_suspend(adev, adev->in_runpm); + amdgpu_userq_suspend(adev); + } r = amdgpu_device_evict_resources(adev); if (r) @@ -5071,6 +5142,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; + + r = amdgpu_userq_resume(adev); + if (r) + goto exit; } r = amdgpu_device_ip_late_init(adev); @@ -5119,9 +5194,6 @@ exit: } adev->in_suspend = false; - if (adev->enable_mes) - amdgpu_mes_self_test(adev); - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) DRM_WARN("smart shift update failed\n"); @@ -5502,6 +5574,29 @@ mode1_reset_failed: return ret; } +int amdgpu_device_link_reset(struct amdgpu_device *adev) +{ + int ret = 0; + + dev_info(adev->dev, "GPU link reset\n"); + + if (!adev->pcie_reset_ctx.occurs_dpc) + ret = amdgpu_dpm_link_reset(adev); + + if (ret) + goto link_reset_failed; + + ret = amdgpu_psp_wait_for_bootloader(adev); + if (ret) + goto link_reset_failed; + + return 0; + +link_reset_failed: + dev_err(adev->dev, "GPU link reset failed\n"); + return ret; +} + int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { @@ -5806,6 +5901,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: + case AMD_RESET_METHOD_LINK: adev->mp1_state = PP_MP1_STATE_SHUTDOWN; break; case AMD_RESET_METHOD_MODE2: @@ -5922,94 +6018,42 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) return ret; } -/** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler - * - * @adev: amdgpu_device pointer - * @job: which job trigger hang - * @reset_context: amdgpu reset context pointer - * - * Attempt to reset the GPU if it has hung (all asics). - * Attempt to do soft-reset or full-reset and reinitialize Asic - * Returns 0 for success or an error on failure. - */ - -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, +static int amdgpu_device_halt_activities(struct amdgpu_device *adev, struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context) + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, + struct amdgpu_hive_info *hive, + bool need_emergency_restart) { - struct list_head device_list, *device_list_handle = NULL; - bool job_signaled = false; - struct amdgpu_hive_info *hive = NULL; + struct list_head *device_list_handle = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; - bool need_emergency_restart = false; - bool audio_suspended = false; - int retry_limit = AMDGPU_MAX_RETRY_LIMIT; - /* - * If it reaches here because of hang/timeout and a RAS error is - * detected at the same time, let RAS recovery take care of it. - */ - if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && - !amdgpu_sriov_vf(adev) && - reset_context->src != AMDGPU_RESET_SRC_RAS) { - dev_dbg(adev->dev, - "Gpu recovery from source: %d yielding to RAS error recovery handling", - reset_context->src); - return 0; - } - /* - * Special case: RAS triggered and full reset isn't supported - */ - need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); - - /* - * Flush RAM to disk so that after reboot - * the user can read log and see why the system rebooted. - */ - if (need_emergency_restart && amdgpu_ras_get_context(adev) && - amdgpu_ras_get_context(adev)->reboot) { - DRM_WARN("Emergency reboot."); - - ksys_sync_helper(); - emergency_restart(); - } - - dev_info(adev->dev, "GPU %s begin!\n", - need_emergency_restart ? "jobs stop":"reset"); - - if (!amdgpu_sriov_vf(adev)) - hive = amdgpu_get_xgmi_hive(adev); - if (hive) - mutex_lock(&hive->hive_lock); - - reset_context->job = job; - reset_context->hive = hive; /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list * to put adev in the 1st position. */ - INIT_LIST_HEAD(&device_list); if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - list_add_tail(&tmp_adev->reset_list, &device_list); + list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->pcie_reset_ctx.in_link_reset = true; } - if (!list_is_first(&adev->reset_list, &device_list)) - list_rotate_to_front(&adev->reset_list, &device_list); - device_list_handle = &device_list; + if (!list_is_first(&adev->reset_list, device_list)) + list_rotate_to_front(&adev->reset_list, device_list); + device_list_handle = device_list; } else { - list_add_tail(&adev->reset_list, &device_list); - device_list_handle = &device_list; + list_add_tail(&adev->reset_list, device_list); + device_list_handle = device_list; } - if (!amdgpu_sriov_vf(adev)) { + if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { r = amdgpu_device_health_check(device_list_handle); if (r) - goto end_reset; + return r; } /* We need to lock reset domain only once both for XGMI and single device */ @@ -6033,7 +6077,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * some audio codec errors. */ if (!amdgpu_device_suspend_display_audio(tmp_adev)) - audio_suspended = true; + tmp_adev->pcie_reset_ctx.audio_suspended = true; amdgpu_ras_set_error_query_ready(tmp_adev, false); @@ -6051,6 +6095,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* disable ras on ALL IPs */ if (!need_emergency_restart && + (!adev->pcie_reset_ctx.occurs_dpc) && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); @@ -6068,24 +6113,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, atomic_inc(&tmp_adev->gpu_reset_counter); } - if (need_emergency_restart) - goto skip_sched_resume; + return r; +} - /* - * Must check guilty signal here since after this point all old - * HW fences are force signaled. - * - * job->base holds a reference to parent fence - */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { - job_signaled = true; - dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); - goto skip_hw_reset; - } +static int amdgpu_device_asic_reset(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *tmp_adev = NULL; + int retry_limit = AMDGPU_MAX_RETRY_LIMIT; + int r = 0; retry: /* Rest of adevs pre asic reset from XGMI hive. */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); + if (adev->pcie_reset_ctx.occurs_dpc) + tmp_adev->no_hw_access = false; /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -6097,6 +6142,11 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ /* Actual ASIC resets if needed.*/ /* Host driver will handle XGMI hive reset for SRIOV */ if (amdgpu_sriov_vf(adev)) { + + /* Bail out of reset early */ + if (amdgpu_ras_is_rma(adev)) + return -ENODEV; + if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) { dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n"); amdgpu_ras_set_fed(adev, true); @@ -6111,12 +6161,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (r) adev->asic_reset_res = r; } else { - r = amdgpu_do_asic_reset(device_list_handle, reset_context); + r = amdgpu_do_asic_reset(device_list, reset_context); if (r && r == -EAGAIN) goto retry; } - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { /* * Drop any pending non scheduler resets queued before reset is done. * Any reset scheduled after this point would be valid. Scheduler resets @@ -6126,10 +6176,18 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_device_stop_pending_resets(tmp_adev); } -skip_hw_reset: + return r; +} + +static int amdgpu_device_sched_resume(struct list_head *device_list, + struct amdgpu_reset_context *reset_context, + bool job_signaled) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; /* Post ASIC reset for all devs .*/ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + list_for_each_entry(tmp_adev, device_list, reset_list) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -6165,8 +6223,16 @@ skip_hw_reset: } } -skip_sched_resume: - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + return r; +} + +static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, + struct list_head *device_list, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + + list_for_each_entry(tmp_adev, device_list, reset_list) { /* unlock kfd: SRIOV would do it separately */ if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); @@ -6177,18 +6243,114 @@ skip_sched_resume: if (!adev->kfd.init_complete) amdgpu_amdkfd_device_init(adev); - if (audio_suspended) + if (tmp_adev->pcie_reset_ctx.audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); amdgpu_device_unset_mp1_state(tmp_adev); amdgpu_ras_set_error_query_ready(tmp_adev, true); + } - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + tmp_adev = list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} + + +/** + * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * + * @adev: amdgpu_device pointer + * @job: which job trigger hang + * @reset_context: amdgpu reset context pointer + * + * Attempt to reset the GPU if it has hung (all asics). + * Attempt to do soft-reset or full-reset and reinitialize Asic + * Returns 0 for success or an error on failure. + */ + +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context) +{ + struct list_head device_list; + bool job_signaled = false; + struct amdgpu_hive_info *hive = NULL; + int r = 0; + bool need_emergency_restart = false; + + /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + !amdgpu_sriov_vf(adev) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } + + /* + * Special case: RAS triggered and full reset isn't supported + */ + need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ + if (need_emergency_restart && amdgpu_ras_get_context(adev) && + amdgpu_ras_get_context(adev)->reboot) { + DRM_WARN("Emergency reboot."); + + ksys_sync_helper(); + emergency_restart(); + } + + dev_info(adev->dev, "GPU %s begin!\n", + need_emergency_restart ? "jobs stop":"reset"); + + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); + if (hive) + mutex_lock(&hive->hive_lock); + + reset_context->job = job; + reset_context->hive = hive; + INIT_LIST_HEAD(&device_list); + + r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list, + hive, need_emergency_restart); + if (r) + goto end_reset; + + if (need_emergency_restart) + goto skip_sched_resume; + /* + * Must check guilty signal here since after this point all old + * HW fences are force signaled. + * + * job->base holds a reference to parent fence + */ + if (job && dma_fence_is_signaled(&job->hw_fence)) { + job_signaled = true; + dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); + goto skip_hw_reset; + } + + r = amdgpu_device_asic_reset(adev, &device_list, reset_context); + if (r) + goto end_reset; +skip_hw_reset: + r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); + if (r) + goto end_reset; +skip_sched_resume: + amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); @@ -6572,12 +6734,15 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + struct amdgpu_reset_context reset_context; + struct list_head device_list; + int r = 0; - DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); + dev_info(adev->dev, "PCI error: detected callback!!\n"); - if (adev->gmc.xgmi.num_physical_nodes > 1) { - DRM_WARN("No support for XGMI hive yet..."); + if (!amdgpu_dpm_is_link_reset_supported(adev)) { + dev_warn(adev->dev, "No support for XGMI hive yet...\n"); return PCI_ERS_RESULT_DISCONNECT; } @@ -6585,32 +6750,30 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta switch (state) { case pci_channel_io_normal: + dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state); return PCI_ERS_RESULT_CAN_RECOVER; - /* Fatal error, prepare for slot reset */ case pci_channel_io_frozen: - /* - * Locking adev->reset_domain->sem will prevent any external access - * to GPU during PCI error recovery - */ - amdgpu_device_lock_reset_domain(adev->reset_domain); - amdgpu_device_set_mp1_state(adev); + /* Fatal error, prepare for slot reset */ + dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state); - /* - * Block any work scheduling as we do for regular GPU reset - * for the duration of the recovery - */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + if (hive) + mutex_lock(&hive->hive_lock); + adev->pcie_reset_ctx.occurs_dpc = true; + memset(&reset_context, 0, sizeof(reset_context)); + INIT_LIST_HEAD(&device_list); - if (!amdgpu_ring_sched_ready(ring)) - continue; - - drm_sched_stop(&ring->sched, NULL); + r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, + hive, false); + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } - atomic_inc(&adev->gpu_reset_counter); + if (r) + return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */ + dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state); return PCI_ERS_RESULT_DISCONNECT; } @@ -6623,8 +6786,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta */ pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) { + struct drm_device *dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(dev); - DRM_INFO("PCI error: mmio enabled callback!!\n"); + dev_info(adev->dev, "PCI error: mmio enabled callback!!\n"); /* TODO - dump whatever for debugging purposes */ @@ -6648,10 +6813,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int r, i; struct amdgpu_reset_context reset_context; - u32 memsize; + struct amdgpu_device *tmp_adev; + struct amdgpu_hive_info *hive; struct list_head device_list; + int r = 0, i; + u32 memsize; /* PCI error slot reset should be skipped During RAS recovery */ if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || @@ -6659,15 +6826,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) amdgpu_ras_in_recovery(adev)) return PCI_ERS_RESULT_RECOVERED; - DRM_INFO("PCI error: slot reset callback!!\n"); + dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); - INIT_LIST_HEAD(&device_list); - list_add_tail(&adev->reset_list, &device_list); - /* wait for asic to come out of reset */ - msleep(500); + msleep(700); /* Restore PCI confspace */ amdgpu_device_load_pci_state(pdev); @@ -6688,26 +6852,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); + INIT_LIST_HEAD(&device_list); - adev->no_hw_access = true; - r = amdgpu_device_pre_asic_reset(adev, &reset_context); - adev->no_hw_access = false; - if (r) - goto out; - - r = amdgpu_do_asic_reset(&device_list, &reset_context); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + reset_context.hive = hive; + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = true; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else { + set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + list_add_tail(&adev->reset_list, &device_list); + } + r = amdgpu_device_asic_reset(adev, &device_list, &reset_context); out: if (!r) { if (amdgpu_device_cache_pci_state(adev->pdev)) pci_restore_state(adev->pdev); - - DRM_INFO("PCIe error recovery succeeded\n"); + dev_info(adev->dev, "PCIe error recovery succeeded\n"); } else { - DRM_ERROR("PCIe error recovery failed, err:%d", r); - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); + dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r); + if (hive) { + list_for_each_entry(tmp_adev, &device_list, reset_list) + amdgpu_device_unset_mp1_state(tmp_adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); + } + } + + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -6724,26 +6902,36 @@ void amdgpu_pci_resume(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - int i; + struct list_head device_list; + struct amdgpu_hive_info *hive = NULL; + struct amdgpu_device *tmp_adev = NULL; - - DRM_INFO("PCI error: resume callback!!\n"); + dev_info(adev->dev, "PCI error: resume callback!!\n"); /* Only continue execution for the case of pci_channel_io_frozen */ if (adev->pci_channel_state != pci_channel_io_frozen) return; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + INIT_LIST_HEAD(&device_list); - if (!amdgpu_ring_sched_ready(ring)) - continue; + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + tmp_adev->pcie_reset_ctx.in_link_reset = false; + list_add_tail(&tmp_adev->reset_list, &device_list); + } + } else + list_add_tail(&adev->reset_list, &device_list); - drm_sched_start(&ring->sched, 0); + amdgpu_device_sched_resume(&device_list, NULL, NULL); + amdgpu_device_gpu_resume(adev, &device_list, false); + adev->pcie_reset_ctx.occurs_dpc = false; + + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); } - - amdgpu_device_unset_mp1_state(adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 72c807f5822e..4ddd08ce8885 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -51,6 +51,8 @@ #include "amdgpu_reset.h" #include "amdgpu_sched.h" #include "amdgpu_xgmi.h" +#include "amdgpu_userq.h" +#include "amdgpu_userq_fence.h" #include "../amdxcp/amdgpu_xcp_drv.h" /* @@ -123,9 +125,10 @@ * - 3.61.0 - Contains fix for RV/PCO compute queues * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size + * - 3.64.0 - Userq IP support query */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 63 +#define KMS_DRIVER_MINOR 64 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -140,6 +143,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_SMU_POOL = BIT(7), + AMDGPU_DEBUG_VM_USERPTR = BIT(8), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -176,7 +180,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; -bool enforce_isolation; +int amdgpu_enforce_isolation = -1; int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer @@ -238,6 +242,8 @@ int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; +int amdgpu_rebar = -1; /* auto */ +int amdgpu_user_queue = -1; DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", @@ -1033,11 +1039,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); /** - * DOC: enforce_isolation (bool) - * enforce process isolation between graphics and compute via using the same reserved vmid. + * DOC: enforce_isolation (int) + * enforce process isolation between graphics and compute. + * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader) */ -module_param(enforce_isolation, bool, 0444); -MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); +module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444); +MODULE_PARM_DESC(enforce_isolation, +"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)"); /** * DOC: modeset (int) @@ -1096,6 +1104,28 @@ MODULE_PARM_DESC(wbrf, "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); module_param_named(wbrf, amdgpu_wbrf, int, 0444); +/** + * DOC: rebar (int) + * Allow BAR resizing. Disable this to prevent the driver from attempting + * to resize the BAR if the GPU supports it and there is available MMIO space. + * Note that this just prevents the driver from resizing the BAR. The BIOS + * may have already resized the BAR at boot time. + */ +MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(rebar, amdgpu_rebar, int, 0444); + +/** + * DOC: user_queue (int) + * Enable user queues on systems that support user queues. Possible values: + * + * - -1 = auto (ASIC specific default) + * - 0 = user queues disabled + * - 1 = user queues enabled and kernel queues enabled (if supported) + * - 2 = user queues enabled and kernel queues disabled + */ +MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)"); +module_param_named(user_queue, amdgpu_user_queue, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ @@ -2244,6 +2274,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: use vram for smu pool\n"); adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM; } + if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) { + pr_info("debug: VM mode debug for userptr is enabled\n"); + adev->debug_vm_userptr = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2700,6 +2734,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev) return 0; } +static int amdgpu_runtime_idle_check_userq(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + ret = -EBUSY; + goto done; + } + } +done: + mutex_unlock(&adev->userq_mutex); + + return ret; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2713,6 +2770,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + return ret; + ret = amdgpu_runtime_idle_check_userq(dev); if (ret) return ret; @@ -2836,12 +2896,30 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) } ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + goto done; + ret = amdgpu_runtime_idle_check_userq(dev); +done: pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); return ret; } +static int amdgpu_drm_release(struct inode *inode, struct file *filp) +{ + struct drm_file *file_priv = filp->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + if (fpriv) { + fpriv->evf_mgr.fd_closing = true; + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + } + + return drm_release(inode, filp); +} + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2893,7 +2971,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, .flush = amdgpu_flush, - .release = drm_release, + .release = amdgpu_drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, @@ -2940,6 +3018,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct drm_driver amdgpu_kms_driver = { @@ -3030,6 +3111,10 @@ static int __init amdgpu_init(void) if (r) goto error_fence; + r = amdgpu_userq_fence_slab_init(); + if (r) + goto error_fence; + DRM_INFO("amdgpu kernel modesetting enabled.\n"); amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); @@ -3061,6 +3146,7 @@ static void __exit amdgpu_exit(void) amdgpu_acpi_release(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c new file mode 100644 index 000000000000..73b629b5f56f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include +#include "amdgpu.h" + +#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name) +#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr) + +static const char * +amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu_eviction_fence"; +} + +static const char * +amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_eviction_fence *ef; + + ef = container_of(f, struct amdgpu_eviction_fence, base); + return ef->timeline_name; +} + +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec) +{ + struct amdgpu_eviction_fence *old_ef, *new_ef; + struct drm_gem_object *obj; + unsigned long index; + int ret; + + if (evf_mgr->ev_fence && + !dma_fence_is_signaled(&evf_mgr->ev_fence->base)) + return 0; + /* + * Steps to replace eviction fence: + * * lock all objects in exec (caller) + * * create a new eviction fence + * * update new eviction fence in evf_mgr + * * attach the new eviction fence to BOs + * * release the old fence + * * unlock the objects (caller) + */ + new_ef = amdgpu_eviction_fence_create(evf_mgr); + if (!new_ef) { + DRM_ERROR("Failed to create new eviction fence\n"); + return -ENOMEM; + } + + /* Update the eviction fence now */ + spin_lock(&evf_mgr->ev_fence_lock); + old_ef = evf_mgr->ev_fence; + evf_mgr->ev_fence = new_ef; + spin_unlock(&evf_mgr->ev_fence_lock); + + /* Attach the new fence */ + drm_exec_for_each_locked_object(exec, index, obj) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + if (!bo) + continue; + ret = amdgpu_eviction_fence_attach(evf_mgr, bo); + if (ret) { + DRM_ERROR("Failed to attch new eviction fence\n"); + goto free_err; + } + } + + /* Free old fence */ + if (old_ef) + dma_fence_put(&old_ef->base); + return 0; + +free_err: + kfree(new_ef); + return ret; +} + +static void +amdgpu_eviction_fence_suspend_worker(struct work_struct *work) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); + struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_eviction_fence *ev_fence; + + mutex_lock(&uq_mgr->userq_mutex); + ev_fence = evf_mgr->ev_fence; + if (!ev_fence) + goto unlock; + + amdgpu_userq_evict(uq_mgr, ev_fence); + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); +} + +static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr; + struct amdgpu_eviction_fence *ev_fence; + + if (!f) + return true; + + ev_fence = to_ev_fence(f); + evf_mgr = ev_fence->evf_mgr; + + schedule_delayed_work(&evf_mgr->suspend_work, 0); + return true; +} + +static const struct dma_fence_ops amdgpu_eviction_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_eviction_fence_get_driver_name, + .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, + .enable_signaling = amdgpu_eviction_fence_enable_signaling, +}; + +void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence) +{ + spin_lock(&evf_mgr->ev_fence_lock); + dma_fence_signal(&ev_fence->base); + spin_unlock(&evf_mgr->ev_fence_lock); +} + +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL); + if (!ev_fence) + return NULL; + + ev_fence->evf_mgr = evf_mgr; + get_task_comm(ev_fence->timeline_name, current); + spin_lock_init(&ev_fence->lock); + dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops, + &ev_fence->lock, evf_mgr->ev_fence_ctx, + atomic_inc_return(&evf_mgr->ev_fence_seq)); + return ev_fence; +} + +void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + /* Wait for any pending work to execute */ + flush_delayed_work(&evf_mgr->suspend_work); + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + + if (!ev_fence) + return; + + dma_fence_wait(&ev_fence->base, false); + + /* Last unref of ev_fence */ + dma_fence_put(&ev_fence->base); +} + +int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct amdgpu_eviction_fence *ev_fence; + struct dma_resv *resv = bo->tbo.base.resv; + int ret; + + if (!resv) + return 0; + + ret = dma_resv_reserve_fences(resv, 1); + if (ret) { + DRM_DEBUG_DRIVER("Failed to resv fence space\n"); + return ret; + } + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + if (ev_fence) + dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP); + spin_unlock(&evf_mgr->ev_fence_lock); + + return 0; +} + +void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *stub = dma_fence_get_stub(); + + dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx, + stub, DMA_RESV_USAGE_BOOKKEEP); + dma_fence_put(stub); +} + +int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + /* This needs to be done one time per open */ + atomic_set(&evf_mgr->ev_fence_seq, 0); + evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); + spin_lock_init(&evf_mgr->ev_fence_lock); + + INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h new file mode 100644 index 000000000000..fcd867b7147d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_EV_FENCE_H_ +#define AMDGPU_EV_FENCE_H_ + +struct amdgpu_eviction_fence { + struct dma_fence base; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; + struct amdgpu_eviction_fence_mgr *evf_mgr; +}; + +struct amdgpu_eviction_fence_mgr { + u64 ev_fence_ctx; + atomic_t ev_fence_seq; + spinlock_t ev_fence_lock; + struct amdgpu_eviction_fence *ev_fence; + struct delayed_work suspend_work; + uint8_t fd_closing; +}; + +/* Eviction fence helper functions */ +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr); + +int +amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +void +amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +int +amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence); + +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 69429df09477..2c68118fe9fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_display.h" @@ -44,6 +45,114 @@ #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +static int +amdgpu_gem_add_input_fence(struct drm_file *filp, + uint64_t syncobj_handles_array, + uint32_t num_syncobj_handles) +{ + struct dma_fence *fence; + uint32_t *syncobj_handles; + int ret, i; + + if (!num_syncobj_handles) + return 0; + + syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array), + sizeof(uint32_t) * num_syncobj_handles); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + for (i = 0; i < num_syncobj_handles; i++) { + + if (!syncobj_handles[i]) { + ret = -EINVAL; + goto free_memdup; + } + + ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence); + if (ret) + goto free_memdup; + + dma_fence_wait(fence, false); + + /* TODO: optimize async handling */ + dma_fence_put(fence); + } + +free_memdup: + kfree(syncobj_handles); + return ret; +} + +static int +amdgpu_gem_update_timeline_node(struct drm_file *filp, + uint32_t syncobj_handle, + uint64_t point, + struct drm_syncobj **syncobj, + struct dma_fence_chain **chain) +{ + if (!syncobj_handle) + return 0; + + /* Find the sync object */ + *syncobj = drm_syncobj_find(filp, syncobj_handle); + if (!*syncobj) + return -ENOENT; + + if (!point) + return 0; + + /* Allocate the chain node */ + *chain = dma_fence_chain_alloc(); + if (!*chain) { + drm_syncobj_put(*syncobj); + return -ENOMEM; + } + + return 0; +} + +static void +amdgpu_gem_update_bo_mapping(struct drm_file *filp, + struct amdgpu_bo_va *bo_va, + uint32_t operation, + uint64_t point, + struct dma_fence *fence, + struct drm_syncobj *syncobj, + struct dma_fence_chain *chain) +{ + struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct dma_fence *last_update; + + if (!syncobj) + return; + + /* Find the last update fence */ + switch (operation) { + case AMDGPU_VA_OP_MAP: + case AMDGPU_VA_OP_REPLACE: + if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)) + last_update = vm->last_update; + else + last_update = bo_va->last_pt_update; + break; + case AMDGPU_VA_OP_UNMAP: + case AMDGPU_VA_OP_CLEAR: + last_update = fence; + break; + default: + return; + } + + /* Add fence to timeline */ + if (!point) + drm_syncobj_replace_fence(syncobj, last_update); + else + drm_syncobj_add_point(syncobj, chain, last_update, point); +} + static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -184,6 +293,15 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, bo_va = amdgpu_vm_bo_add(adev, vm, abo); else ++bo_va->ref_count; + + /* attach gfx eviction fence */ + r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); + if (r) { + DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); + amdgpu_bo_unreserve(abo); + return r; + } + amdgpu_bo_unreserve(abo); /* Validate and add eviction fence to DMABuf imports with dynamic @@ -247,6 +365,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; } + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + bo_va = amdgpu_vm_bo_find(vm, bo); if (!bo_va || --bo_va->ref_count) goto out_unlock; @@ -321,10 +442,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, uint32_t handle, initial_domain; int r; - /* reject DOORBELLs until userspace code to use it is available */ - if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL) - return -EINVAL; - /* reject invalid gem flags */ if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | @@ -638,18 +755,23 @@ out: * * Update the bo_va directly after setting its address. Errors are not * vital here, so they are not reported back to userspace. + * + * Returns resulting fence if freed BO(s) got cleared from the PT. + * otherwise stub fence in case of error. */ -static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_va *bo_va, - uint32_t operation) +static struct dma_fence * +amdgpu_gem_va_update_vm(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo_va *bo_va, + uint32_t operation) { + struct dma_fence *fence = dma_fence_get_stub(); int r; if (!amdgpu_vm_ready(vm)) - return; + return fence; - r = amdgpu_vm_clear_freed(adev, vm, NULL); + r = amdgpu_vm_clear_freed(adev, vm, &fence); if (r) goto error; @@ -665,6 +787,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); + + return fence; } /** @@ -713,6 +837,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo *abo; struct amdgpu_bo_va *bo_va; + struct drm_syncobj *timeline_syncobj = NULL; + struct dma_fence_chain *timeline_chain = NULL; + struct dma_fence *fence; struct drm_exec exec; uint64_t va_flags; uint64_t vm_size; @@ -774,6 +901,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, abo = NULL; } + r = amdgpu_gem_add_input_fence(filp, + args->input_fence_syncobj_handles, + args->num_syncobj_handles); + if (r) + goto error_put_gobj; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { @@ -802,6 +935,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } + r = amdgpu_gem_update_timeline_node(filp, + args->vm_timeline_syncobj_out, + args->vm_timeline_point, + &timeline_syncobj, + &timeline_chain); + if (r) + goto error; + switch (args->operation) { case AMDGPU_VA_OP_MAP: va_flags = amdgpu_gem_va_map_flags(adev, args->flags); @@ -827,12 +968,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, - args->operation); + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { + fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, + args->operation); + + if (timeline_syncobj) + amdgpu_gem_update_bo_mapping(filp, bo_va, + args->operation, + args->vm_timeline_point, + fence, timeline_syncobj, + timeline_chain); + else + dma_fence_put(fence); + + } error: drm_exec_fini(&exec); +error_put_gobj: drm_gem_object_put(gobj); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index cf2df7790077..1db1e6ec0184 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -33,6 +33,7 @@ #include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" +#include "nvd.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -74,14 +75,15 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, adev->gfx.mec_bitmap[xcc_id].queue_bitmap); } -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, - int me, int pipe, int queue) +static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, + int me, int pipe, int queue) { + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ int bit = 0; bit += me * adev->gfx.me.num_pipe_per_me - * adev->gfx.me.num_queue_per_pipe; - bit += pipe * adev->gfx.me.num_queue_per_pipe; + * num_queue_per_pipe; + bit += pipe * num_queue_per_pipe; bit += queue; return bit; @@ -238,8 +240,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe; bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); - int max_queues_per_me = adev->gfx.me.num_pipe_per_me * - adev->gfx.me.num_queue_per_pipe; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ + int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe; if (multipipe_policy) { /* policy: amdgpu owns the first queue per pipe at this stage @@ -247,9 +249,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) for (i = 0; i < max_queues_per_me; i++) { pipe = i % adev->gfx.me.num_pipe_per_me; queue = (i / adev->gfx.me.num_pipe_per_me) % - adev->gfx.me.num_queue_per_pipe; + num_queue_per_pipe; - set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, + set_bit(pipe * num_queue_per_pipe + queue, adev->gfx.me.queue_bitmap); } } else { @@ -258,8 +260,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) } /* update the number of active graphics rings */ - adev->gfx.num_gfx_rings = - bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); + if (adev->gfx.num_gfx_rings) + adev->gfx.num_gfx_rings = + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); } static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, @@ -1351,6 +1354,10 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, struct amdgpu_device *adev = drm_to_adev(ddev); int mode; + /* Only minimal precaution taken to reject requests while in reset.*/ + if (amdgpu_in_reset(adev)) + return -EPERM; + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE); @@ -1394,8 +1401,14 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, return -EINVAL; } + /* Don't allow a switch while under reset */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EPERM; + ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); + up_read(&adev->reset_domain->sem); + if (ret) return ret; @@ -1466,6 +1479,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; job->enforce_isolation = true; + /* always run the cleaner shader */ + job->run_cleaner_shader = true; ib = &job->ibs[0]; for (i = 0; i <= ring->funcs->align_mask; ++i) @@ -1552,6 +1567,9 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, if (adev->in_suspend && !adev->in_runpm) return -EPERM; + if (adev->gfx.disable_kq) + return -EPERM; + ret = kstrtol(buf, 0, &value); if (ret) @@ -1594,7 +1612,8 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' * feature for each GPU partition. Reading from the 'enforce_isolation' * sysfs file returns the isolation settings for all partitions, where '0' - * indicates disabled and '1' indicates enabled. + * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode, + * and '3' indicates enabled without cleaner shader. * * Return: The number of bytes read from the sysfs file. */ @@ -1629,9 +1648,12 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, * @count: The size of the input data * * This function allows control over the 'enforce_isolation' feature, which - * serializes access to the graphics engine. Writing '1' or '0' to the - * 'enforce_isolation' sysfs file enables or disables process isolation for - * each partition. The input should specify the setting for all partitions. + * serializes access to the graphics engine. Writing '0' to disable, '1' to + * enable isolation with cleaner shader, '2' to enable legacy isolation without + * cleaner shader, or '3' to enable process isolation without submitting the + * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode + * for each partition. The input should specify the setting for all + * partitions. * * Return: The number of bytes written to the sysfs file. */ @@ -1668,13 +1690,34 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, return -EINVAL; for (i = 0; i < num_partitions; i++) { - if (partition_values[i] != 0 && partition_values[i] != 1) + if (partition_values[i] != 0 && + partition_values[i] != 1 && + partition_values[i] != 2 && + partition_values[i] != 3) return -EINVAL; } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) - adev->enforce_isolation[i] = partition_values[i]; + for (i = 0; i < num_partitions; i++) { + switch (partition_values[i]) { + case 0: + default: + adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; + break; + case 1: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE; + break; + case 2: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; + break; + case 3: + adev->enforce_isolation[i] = + AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER; + break; + } + } mutex_unlock(&adev->enforce_isolation_mutex); amdgpu_mes_update_enforce_isolation(adev); @@ -1923,39 +1966,41 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, bool enable) { - mutex_lock(&adev->gfx.kfd_sch_mutex); + mutex_lock(&adev->gfx.userq_sch_mutex); if (enable) { /* If the count is already 0, it means there's an imbalance bug somewhere. * Note that the bug may be in a different caller than the one which triggers the * WARN_ON_ONCE. */ - if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) { dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); goto unlock; } - adev->gfx.kfd_sch_req_count[idx]--; + adev->gfx.userq_sch_req_count[idx]--; - if (adev->gfx.kfd_sch_req_count[idx] == 0 && - adev->gfx.kfd_sch_inactive[idx]) { + if (adev->gfx.userq_sch_req_count[idx] == 0 && + adev->gfx.userq_sch_inactive[idx]) { schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); } } else { - if (adev->gfx.kfd_sch_req_count[idx] == 0) { + if (adev->gfx.userq_sch_req_count[idx] == 0) { cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); - if (!adev->gfx.kfd_sch_inactive[idx]) { - amdgpu_amdkfd_stop_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = true; + if (!adev->gfx.userq_sch_inactive[idx]) { + amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx); + if (adev->kfd.init_complete) + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.userq_sch_inactive[idx] = true; } } - adev->gfx.kfd_sch_req_count[idx]++; + adev->gfx.userq_sch_req_count[idx]++; } unlock: - mutex_unlock(&adev->gfx.kfd_sch_mutex); + mutex_unlock(&adev->gfx.userq_sch_mutex); } /** @@ -2000,12 +2045,13 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) msecs_to_jiffies(1)); } else { /* Tell KFD to resume the runqueue */ - if (adev->kfd.init_complete) { - WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); - WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]); + + amdgpu_userq_start_sched_for_enforce_isolation(adev, idx); + if (adev->kfd.init_complete) amdgpu_amdkfd_start_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = false; - } + adev->gfx.userq_sch_inactive[idx] = false; } mutex_unlock(&adev->enforce_isolation_mutex); } @@ -2029,7 +2075,7 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, bool wait = false; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { /* set the initial values if nothing is set */ if (!adev->gfx.enforce_isolation_jiffies[idx]) { adev->gfx.enforce_isolation_jiffies[idx] = jiffies; @@ -2096,7 +2142,7 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } @@ -2133,7 +2179,7 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) return; mutex_lock(&adev->enforce_isolation_mutex); - if (adev->enforce_isolation[idx]) { + if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) { if (adev->kfd.init_complete) sched_work = true; } @@ -2217,6 +2263,74 @@ void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); } +/** + * amdgpu_gfx_csb_preamble_start - Set CSB preamble start + * + * @buffer: This is an output variable that gets the PACKET3 preamble setup. + * + * Return: + * return the latest index. + */ +u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer) +{ + u32 count = 0; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + return count; +} + +/** + * amdgpu_gfx_csb_data_parser - Parser CS data + * + * @adev: amdgpu_device pointer used to get the CS data and other gfx info. + * @buffer: This is an output variable that gets the PACKET3 preamble end. + * @count: Index to start set the preemble end. + * + * Return: + * return the latest index. + */ +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count) +{ + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + u32 i; + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START); + + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } + } + } + + return count; +} + +/** + * amdgpu_gfx_csb_preamble_end - Set CSB preamble end + * + * @buffer: This is an output variable that gets the PACKET3 preamble end. + * @count: Index to start set the preemble end. + */ +void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count) +{ + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + /* * debugfs for to enable/disable gfx job submission to specific core. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 87e862188766..08f268dab8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -170,10 +170,46 @@ struct amdgpu_kiq { #define AMDGPU_GFX_MAX_SE 4 #define AMDGPU_GFX_MAX_SH_PER_SE 2 +/** + * amdgpu_rb_config - Configure a single Render Backend (RB) + * + * Bad RBs are fused off and there is a harvest register the driver reads to + * determine which RB(s) are fused off so that the driver can configure the + * hardware state so that nothing gets sent to them. There are also user + * harvest registers that the driver can program to disable additional RBs, + * etc., for testing purposes. + */ struct amdgpu_rb_config { + /** + * @rb_backend_disable: + * + * The value captured from register RB_BACKEND_DISABLE indicates if the + * RB backend is disabled or not. + */ uint32_t rb_backend_disable; + + /** + * @user_rb_backend_disable: + * + * The value captured from register USER_RB_BACKEND_DISABLE indicates + * if the User RB backend is disabled or not. + */ uint32_t user_rb_backend_disable; + + /** + * @raster_config: + * + * To set up all of the states, it is necessary to have two registers + * to keep all of the states. This field holds the first register. + */ uint32_t raster_config; + + /** + * @raster_config_1: + * + * To set up all of the states, it is necessary to have two registers + * to keep all of the states. This field holds the second register. + */ uint32_t raster_config_1; }; @@ -221,6 +257,13 @@ struct amdgpu_gfx_config { uint32_t macrotile_mode_array[16]; struct gb_addr_config gb_addr_config_fields; + + /** + * @rb_config: + * + * Matrix that keeps all the Render Backend (color and depth buffer + * handling) configuration on the 3D engine. + */ struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; /* gfx configure feature */ @@ -305,7 +348,8 @@ struct amdgpu_gfx_funcs { void (*init_spm_golden)(struct amdgpu_device *adev); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); int (*get_gfx_shadow_info)(struct amdgpu_device *adev, - struct amdgpu_gfx_shadow_info *shadow_info); + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check); enum amdgpu_gfx_partition (*query_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, @@ -474,9 +518,9 @@ struct amdgpu_gfx { bool enable_cleaner_shader; struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; /* Mutex for synchronizing KFD scheduler operations */ - struct mutex kfd_sch_mutex; - u64 kfd_sch_req_count[MAX_XCP]; - bool kfd_sch_inactive[MAX_XCP]; + struct mutex userq_sch_mutex; + u64 userq_sch_req_count[MAX_XCP]; + bool userq_sch_inactive[MAX_XCP]; unsigned long enforce_isolation_jiffies[MAX_XCP]; unsigned long enforce_isolation_time[MAX_XCP]; @@ -484,6 +528,9 @@ struct amdgpu_gfx { struct delayed_work idle_work; bool workload_profile_active; struct mutex workload_profile_mutex; + + bool disable_kq; + bool disable_uq; }; struct amdgpu_gfx_ras_reg_entry { @@ -503,7 +550,7 @@ struct amdgpu_gfx_ras_mem_id_entry { #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id))) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) -#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))) +#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false)) /** * amdgpu_gfx_create_bitmask - create a bitmask @@ -550,8 +597,6 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); -int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, - int pipe, int queue); bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); @@ -597,6 +642,9 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work); void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring); +u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer); +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count); +void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count); void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ecb74ccf1d90..6b0fbbb91e57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -1230,6 +1230,10 @@ static ssize_t current_memory_partition_show( struct amdgpu_device *adev = drm_to_adev(ddev); enum amdgpu_memory_partition mode; + /* Only minimal precaution taken to reject requests while in reset */ + if (amdgpu_in_reset(adev)) + return -EPERM; + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); if ((mode >= ARRAY_SIZE(nps_desc)) || (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index bd7fc123b8f9..80fa29c26e9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -62,6 +62,9 @@ */ #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL +/* XNACK flags */ +#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0) + struct firmware; enum amdgpu_memory_partition { @@ -301,6 +304,7 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; int noretry; + uint32_t xnack_flags; uint32_t vmid0_page_table_block_size; uint32_t vmid0_page_table_depth; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index b6cf801939aa..6e02fb9ac2f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "amdgpu_ras.h" +#include int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev) { @@ -46,3 +47,22 @@ int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev) /* hdp ras follows amdgpu_ras_block_late_init_default for late init */ return 0; } + +void amdgpu_hdp_generic_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32((adev->rmmio_remap.reg_offset + + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> + 2, + 0); + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } else { + amdgpu_ring_emit_wreg(ring, + (adev->rmmio_remap.reg_offset + + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> + 2, + 0); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 7b8a6152dc8d..4cfd932b7e91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -44,4 +44,6 @@ struct amdgpu_hdp { }; int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev); +void amdgpu_hdp_generic_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2ea98ec60220..802743efa3b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -163,12 +163,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow = false; } - if (!ring->sched.ready && !ring->is_mes_queue) { + if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } - if (vm && !job->vmid && !ring->is_mes_queue) { + if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 4c4e087230ac..5dd78a9cb12d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -576,8 +576,16 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&id_mgr->ids_lru); id_mgr->reserved_use_count = 0; - /* manage only VMIDs not used by KFD */ - id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; + /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) + /* manage only VMIDs not used by KFD */ + id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; + else if (AMDGPU_IS_MMHUB0(i) || + AMDGPU_IS_MMHUB1(i)) + id_mgr->num_ids = 16; + else + /* manage only VMIDs not used by KFD */ + id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { @@ -588,7 +596,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } /* alloc a default reserved vmid to enforce isolation */ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 901f8b12c672..30f16968b578 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_ih.h" +#include "amdgpu_reset.h" /** * amdgpu_ih_ring_init - initialize the IH state @@ -227,13 +228,23 @@ restart_ih: ih->rptr &= ih->ptr_mask; } - amdgpu_ih_set_rptr(adev, ih); + if (!ih->overflow) + amdgpu_ih_set_rptr(adev, ih); + wake_up_all(&ih->wait_process); /* make sure wptr hasn't changed while processing */ wptr = amdgpu_ih_get_wptr(adev, ih); if (wptr != ih->rptr) - goto restart_ih; + if (!ih->overflow) + goto restart_ih; + + if (ih->overflow) + if (amdgpu_sriov_runtime(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index b0a88f92cd82..7f7ea046e209 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,7 @@ struct amdgpu_ih_ring { /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; uint64_t processed_timestamp; + bool overflow; }; /* return true if time stamp t2 is after t1 with 48bit wrap around */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 38e7043016e1..13c60cac4261 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -619,6 +619,10 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type) { + /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */ + if (amdgpu_ras_is_rma(adev)) + return -EINVAL; + if (!adev->irq.installed) return -ENOENT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ce6b9ba967ff..f2c049129661 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -78,6 +78,7 @@ struct amdgpu_job { /* enforce isolation */ bool enforce_isolation; + bool run_cleaner_shader; uint32_t num_ibs; struct amdgpu_ib ibs[]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 27bfe9c8af06..9fbb04aee97b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" #include "amdgpu_reset.h" #include "amd_pcie.h" +#include "amdgpu_userq.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { @@ -370,6 +371,26 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, return 0; } +static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_gfx *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow = {}; + + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true); + meta->shadow_size = shadow.shadow_size; + meta->shadow_alignment = shadow.shadow_alignment; + meta->csa_size = shadow.csa_size; + meta->csa_alignment = shadow.csa_alignment; + ret = 0; + } + + return ret; +} + static int amdgpu_hw_ip_info(struct amdgpu_device *adev, struct drm_amdgpu_info *info, struct drm_amdgpu_info_hw_ip *result) @@ -387,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_GFX: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_gfx_rings; i++) - if (adev->gfx.gfx_ring[i].sched.ready) + if (adev->gfx.gfx_ring[i].sched.ready && + !adev->gfx.gfx_ring[i].no_user_submission) ++num_rings; ib_start_alignment = 32; ib_size_alignment = 32; @@ -395,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_COMPUTE: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_compute_rings; i++) - if (adev->gfx.compute_ring[i].sched.ready) + if (adev->gfx.compute_ring[i].sched.ready && + !adev->gfx.compute_ring[i].no_user_submission) ++num_rings; ib_start_alignment = 32; ib_size_alignment = 32; @@ -403,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_DMA: type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) - if (adev->sdma.instance[i].ring.sched.ready) + if (adev->sdma.instance[i].ring.sched.ready && + !adev->sdma.instance[i].ring.no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -414,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.harvest_config & (1 << i)) continue; - if (adev->uvd.inst[i].ring.sched.ready) + if (adev->uvd.inst[i].ring.sched.ready && + !adev->uvd.inst[i].ring.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -423,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; for (i = 0; i < adev->vce.num_rings; i++) - if (adev->vce.ring[i].sched.ready) + if (adev->vce.ring[i].sched.ready && + !adev->vce.ring[i].no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -435,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->uvd.num_enc_rings; j++) - if (adev->uvd.inst[i].ring_enc[j].sched.ready) + if (adev->uvd.inst[i].ring_enc[j].sched.ready && + !adev->uvd.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -447,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->vcn.inst[i].ring_dec.sched.ready) + if (adev->vcn.inst[i].ring_dec.sched.ready && + !adev->vcn.inst[i].ring_dec.no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -460,7 +488,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++) - if (adev->vcn.inst[i].ring_enc[j].sched.ready) + if (adev->vcn.inst[i].ring_enc[j].sched.ready && + !adev->vcn.inst[i].ring_enc[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -475,7 +504,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) - if (adev->jpeg.inst[i].ring_dec[j].sched.ready) + if (adev->jpeg.inst[i].ring_dec[j].sched.ready && + !adev->jpeg.inst[i].ring_dec[j].no_user_submission) ++num_rings; } ib_start_alignment = 256; @@ -483,7 +513,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VPE: type = AMD_IP_BLOCK_TYPE_VPE; - if (adev->vpe.ring.sched.ready) + if (adev->vpe.ring.sched.ready && + !adev->vpe.ring.no_user_submission) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -978,6 +1009,8 @@ out: } } + dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + ret = copy_to_user(out, dev_info, min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; kfree(dev_info); @@ -1293,6 +1326,22 @@ out: return copy_to_user(out, &gpuvm_fault, min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0; } + case AMDGPU_INFO_UQ_FW_AREAS: { + struct drm_amdgpu_info_uq_metadata meta_info = {}; + + switch (info->query_hw_ip.type) { + case AMDGPU_HW_IP_GFX: + ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx); + if (ret) + return ret; + + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + default: + return -EINVAL; + } + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -1376,6 +1425,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); + r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev); + if (r) + DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n"); + + r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); + if (r) + goto error_vm; + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); file_priv->driver_priv = fpriv; @@ -1445,6 +1502,11 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(pd); } + if (!fpriv->evf_mgr.fd_closing) { + fpriv->evf_mgr.fd_closing = true; + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + } amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index fb212f0a1136..2febb63ab232 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -39,42 +39,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) PAGE_SIZE); } -static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - int ip_type, uint64_t *doorbell_index) -{ - unsigned int offset, found; - struct amdgpu_mes *mes = &adev->mes; - - if (ip_type == AMDGPU_RING_TYPE_SDMA) - offset = adev->doorbell_index.sdma_engine[0]; - else - offset = 0; - - found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); - if (found >= mes->num_mes_dbs) { - DRM_WARN("No doorbell available\n"); - return -ENOSPC; - } - - set_bit(found, mes->doorbell_bitmap); - - /* Get the absolute doorbell index on BAR */ - *doorbell_index = mes->db_start_dw_offset + found * 2; - return 0; -} - -static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - uint32_t doorbell_index) -{ - unsigned int old, rel_index; - struct amdgpu_mes *mes = &adev->mes; - - /* Find the relative index of the doorbell in this object */ - rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; - old = test_and_clear_bit(rel_index, mes->doorbell_bitmap); - WARN_ON(!old); -} - static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) { int i; @@ -126,7 +90,7 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) int amdgpu_mes_init(struct amdgpu_device *adev) { - int i, r; + int i, r, num_pipes; adev->mes.adev = adev; @@ -142,19 +106,52 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; - adev->mes.vmid_mask_gfxhub = 0xffffff00; + adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00; - for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { - if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) + num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; + if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) + dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_GFX_PIPES); + + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { + if (i >= num_pipes) break; - adev->mes.compute_hqd_mask[i] = 0xc; + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(12, 0, 0)) + /* + * GFX V12 has only one GFX pipe, but 8 queues in it. + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1-7 for MES scheduling + * mask = 1111 1110b + */ + adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE; + else + /* + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1 for MES scheduling + * mask = 10b + */ + adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2; } - for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) - adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; + num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; + if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) + dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); + + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { + if (i >= num_pipes) + break; + adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC; + } + + num_pipes = adev->sdma.num_instances; + if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) + dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (i >= adev->sdma.num_instances) + if (i >= num_pipes) break; adev->mes.sdma_hqd_mask[i] = 0xfc; } @@ -240,244 +237,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) mutex_destroy(&adev->mes.mutex_hidden); } -static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) -{ - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); -} - -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm) -{ - struct amdgpu_mes_process *process; - int r; - - /* allocate the mes process buffer */ - process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL); - if (!process) { - DRM_ERROR("no more memory to create mes process\n"); - return -ENOMEM; - } - - /* allocate the process context bo and map it */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_memory; - } - memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* add the mes process to idr list */ - r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to lock pasid=%d\n", pasid); - goto clean_up_ctx; - } - - INIT_LIST_HEAD(&process->gang_list); - process->vm = vm; - process->pasid = pasid; - process->process_quantum = adev->mes.default_process_quantum; - process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); -clean_up_memory: - kfree(process); - return r; -} - -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang, *tmp1; - struct amdgpu_mes_queue *queue, *tmp2; - struct mes_remove_queue_input queue_input; - unsigned long flags; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_WARN("pasid %d doesn't exist\n", pasid); - amdgpu_mes_unlock(&adev->mes); - return; - } - - /* Remove all queues from hardware */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, - &queue_input); - if (r) - DRM_WARN("failed to remove hardware queue\n"); - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - } - - idr_remove(&adev->mes.pasid_idr, pasid); - amdgpu_mes_unlock(&adev->mes); - - /* free all memory allocated by the process */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - /* free all queues in the gang */ - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - amdgpu_mes_queue_free_mqd(queue); - list_del(&queue->list); - kfree(queue); - } - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - list_del(&gang->list); - kfree(gang); - - } - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - kfree(process); -} - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; - int r; - - /* allocate the mes gang buffer */ - gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL); - if (!gang) { - return -ENOMEM; - } - - /* allocate the gang context bo and map it to cpu space */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_mem; - } - memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_ERROR("pasid %d doesn't exist\n", pasid); - r = -EINVAL; - goto clean_up_ctx; - } - - /* add the mes gang to idr list */ - r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to allocate idr for gang\n"); - goto clean_up_ctx; - } - - gang->gang_id = r; - *gang_id = r; - - INIT_LIST_HEAD(&gang->queue_list); - gang->process = process; - gang->priority = gprops->priority; - gang->gang_quantum = gprops->gang_quantum ? - gprops->gang_quantum : adev->mes.default_gang_quantum; - gang->global_priority_level = gprops->global_priority_level; - gang->inprocess_gang_priority = gprops->inprocess_gang_priority; - list_add_tail(&gang->list, &process->gang_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); -clean_up_mem: - kfree(gang); - return r; -} - -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) -{ - struct amdgpu_mes_gang *gang; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - - if (!list_empty(&gang->queue_list)) { - DRM_ERROR("queue list is not empty\n"); - amdgpu_mes_unlock(&adev->mes); - return -EBUSY; - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - list_del(&gang->list); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - - kfree(gang); - - return 0; -} - int amdgpu_mes_suspend(struct amdgpu_device *adev) { struct mes_suspend_gang_input input; @@ -526,304 +285,6 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) return r; } -static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - u32 mqd_size = mqd_mgr->mqd_size; - int r; - - r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &q->mqd_obj, - &q->mqd_gpu_addr, &q->mqd_cpu_ptr); - if (r) { - dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r); - return r; - } - memset(q->mqd_cpu_ptr, 0, mqd_size); - - r = amdgpu_bo_reserve(q->mqd_obj, false); - if (unlikely(r != 0)) - goto clean_up; - - return 0; - -clean_up: - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); - return r; -} - -static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - struct amdgpu_mqd_prop mqd_prop = {0}; - - mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; - mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; - mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr; - mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr; - mqd_prop.queue_size = p->queue_size; - mqd_prop.use_doorbell = true; - mqd_prop.doorbell_index = p->doorbell_off; - mqd_prop.eop_gpu_addr = p->eop_gpu_addr; - mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority; - mqd_prop.hqd_queue_priority = p->hqd_queue_priority; - mqd_prop.hqd_active = false; - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); - } - - mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } - - amdgpu_bo_unreserve(q->mqd_obj); -} - -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id) -{ - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_add_queue_input queue_input; - unsigned long flags; - int r; - - memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); - - /* allocate the mes queue buffer */ - queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); - if (!queue) { - DRM_ERROR("Failed to allocate memory for queue\n"); - return -ENOMEM; - } - - /* Allocate the queue mqd */ - r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); - if (r) - goto clean_up_memory; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - r = -EINVAL; - goto clean_up_mqd; - } - - /* add the mes gang to idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0, - GFP_ATOMIC); - if (r < 0) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - goto clean_up_mqd; - } - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - *queue_id = queue->queue_id = r; - - /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, - qprops->queue_type, - &qprops->doorbell_off); - if (r) - goto clean_up_queue_id; - - /* initialize the queue mqd */ - amdgpu_mes_queue_init_mqd(adev, queue, qprops); - - /* add hw queue to mes */ - queue_input.process_id = gang->process->pasid; - - queue_input.page_table_base_addr = - adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - - adev->gmc.vram_start; - - queue_input.process_va_start = 0; - queue_input.process_va_end = - (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; - queue_input.process_quantum = gang->process->process_quantum; - queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; - queue_input.gang_quantum = gang->gang_quantum; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - queue_input.inprocess_gang_priority = gang->inprocess_gang_priority; - queue_input.gang_global_priority_level = gang->global_priority_level; - queue_input.doorbell_offset = qprops->doorbell_off; - queue_input.mqd_addr = queue->mqd_gpu_addr; - queue_input.wptr_addr = qprops->wptr_gpu_addr; - queue_input.wptr_mc_addr = qprops->wptr_mc_addr; - queue_input.queue_type = qprops->queue_type; - queue_input.paging = qprops->paging; - queue_input.is_kfd_process = 0; - - r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); - if (r) { - DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n", - qprops->doorbell_off); - goto clean_up_doorbell; - } - - DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, " - "queue type=%d, doorbell=0x%llx\n", - gang->process->pasid, gang_id, qprops->queue_type, - qprops->doorbell_off); - - queue->ring = qprops->ring; - queue->doorbell_off = qprops->doorbell_off; - queue->wptr_gpu_addr = qprops->wptr_gpu_addr; - queue->queue_type = qprops->queue_type; - queue->paging = qprops->paging; - queue->gang = gang; - queue->ring->mqd_ptr = queue->mqd_cpu_ptr; - list_add_tail(&queue->list, &gang->queue_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); -clean_up_queue_id: - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); -clean_up_mqd: - amdgpu_mes_unlock(&adev->mes); - amdgpu_mes_queue_free_mqd(queue); -clean_up_memory: - kfree(queue); - return r; -} - -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) -{ - unsigned long flags; - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_remove_queue_input queue_input; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* remove the mes gang from idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - - queue = idr_find(&adev->mes.queue_id_idr, queue_id); - if (!queue) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - amdgpu_mes_unlock(&adev->mes); - DRM_ERROR("queue id %d doesn't exist\n", queue_id); - return -EINVAL; - } - - idr_remove(&adev->mes.queue_id_idr, queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n", - queue->doorbell_off); - - gang = queue->gang; - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to remove hardware queue, queue id = %d\n", - queue_id); - - list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_mes_queue_free_mqd(queue); - kfree(queue); - return 0; -} - -int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) -{ - unsigned long flags; - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_reset_queue_input queue_input; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* remove the mes gang from idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - - queue = idr_find(&adev->mes.queue_id_idr, queue_id); - if (!queue) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - amdgpu_mes_unlock(&adev->mes); - DRM_ERROR("queue id %d doesn't exist\n", queue_id); - return -EINVAL; - } - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n", - queue->doorbell_off); - - gang = queue->gang; - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to reset hardware queue, queue id = %d\n", - queue_id); - - amdgpu_mes_unlock(&adev->mes); - - return 0; -} - -int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, - int me_id, int pipe_id, int queue_id, int vmid) -{ - struct mes_reset_queue_input queue_input; - int r; - - queue_input.queue_type = queue_type; - queue_input.use_mmio = true; - queue_input.me_id = me_id; - queue_input.pipe_id = pipe_id; - queue_input.queue_id = queue_id; - queue_input.vmid = vmid; - r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to reset hardware queue by mmio, queue id = %d\n", - queue_id); - return r; -} - int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -874,7 +335,7 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, unsigned int vmid, bool use_mmio) { - struct mes_reset_legacy_queue_input queue_input; + struct mes_reset_queue_input queue_input; int r; memset(&queue_input, 0, sizeof(queue_input)); @@ -888,8 +349,11 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, queue_input.wptr_addr = ring->wptr_gpu_addr; queue_input.vmid = vmid; queue_input.use_mmio = use_mmio; + queue_input.is_kq = true; + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) + queue_input.legacy_gfx = true; - r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); if (r) DRM_ERROR("failed to reset legacy queue\n"); @@ -905,7 +369,7 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) uint32_t *read_val_ptr; if (amdgpu_device_wb_get(adev, &addr_offset)) { - DRM_ERROR("critical bug! too many mes readers\n"); + dev_err(adev->dev, "critical bug! too many mes readers\n"); goto error; } read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); @@ -915,13 +379,13 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) op_input.read_reg.buffer_addr = read_val_gpu_addr; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes rreg is not supported!\n"); + dev_err(adev->dev, "mes rreg is not supported!\n"); goto error; } r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to read reg (0x%x)\n", reg); + dev_err(adev->dev, "failed to read reg (0x%x)\n", reg); else val = *(read_val_ptr); @@ -942,14 +406,14 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev, op_input.write_reg.reg_value = val; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes wreg is not supported!\n"); + dev_err(adev->dev, "mes wreg is not supported!\n"); r = -EINVAL; goto error; } r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to write reg (0x%x)\n", reg); + dev_err(adev->dev, "failed to write reg (0x%x)\n", reg); error: return r; @@ -969,14 +433,14 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, op_input.wrm_reg.mask = mask; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes reg_write_reg_wait is not supported!\n"); + dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n"); r = -EINVAL; goto error; } r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to reg_write_reg_wait\n"); + dev_err(adev->dev, "failed to reg_write_reg_wait\n"); error: return r; @@ -994,14 +458,14 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, op_input.wrm_reg.mask = mask; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes reg wait is not supported!\n"); + dev_err(adev->dev, "mes reg wait is not supported!\n"); r = -EINVAL; goto error; } r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to reg_write_reg_wait\n"); + dev_err(adev->dev, "failed to reg_write_reg_wait\n"); error: return r; @@ -1075,25 +539,6 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, return r; } -static void -amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_mes_queue_properties *props) -{ - props->queue_type = ring->funcs->type; - props->hqd_base_gpu_addr = ring->gpu_addr; - props->rptr_gpu_addr = ring->rptr_gpu_addr; - props->wptr_gpu_addr = ring->wptr_gpu_addr; - props->wptr_mc_addr = - ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; - props->queue_size = ring->ring_size; - props->eop_gpu_addr = ring->eop_gpu_addr; - props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; - props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; - props->paging = false; - props->ring = ring; -} - #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \ do { \ if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \ @@ -1130,453 +575,12 @@ int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs) return -EINVAL; } -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out) -{ - struct amdgpu_ring *ring; - struct amdgpu_mes_gang *gang; - struct amdgpu_mes_queue_properties qprops = {0}; - int r, queue_id, pasid; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - pasid = gang->process->pasid; - - ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); - if (!ring) { - amdgpu_mes_unlock(&adev->mes); - return -ENOMEM; - } - - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->is_mes_queue = true; - ring->mes_ctx = ctx_data; - ring->idx = idx; - ring->no_scheduler = true; - - if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { - int offset = offsetof(struct amdgpu_mes_ctx_meta_data, - compute[ring->idx].mec_hpd); - ring->eop_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } - - switch (queue_type) { - case AMDGPU_RING_TYPE_GFX: - ring->funcs = adev->gfx.gfx_ring[0].funcs; - ring->me = adev->gfx.gfx_ring[0].me; - ring->pipe = adev->gfx.gfx_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_COMPUTE: - ring->funcs = adev->gfx.compute_ring[0].funcs; - ring->me = adev->gfx.compute_ring[0].me; - ring->pipe = adev->gfx.compute_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_SDMA: - ring->funcs = adev->sdma.instance[0].ring.funcs; - break; - default: - BUG(); - } - - r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) { - amdgpu_mes_unlock(&adev->mes); - goto clean_up_memory; - } - - amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); - - dma_fence_wait(gang->process->vm->last_update, false); - dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); - amdgpu_mes_unlock(&adev->mes); - - r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); - if (r) - goto clean_up_ring; - - ring->hw_queue_id = queue_id; - ring->doorbell_index = qprops.doorbell_off; - - if (queue_type == AMDGPU_RING_TYPE_GFX) - sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); - else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) - sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, - queue_id); - else if (queue_type == AMDGPU_RING_TYPE_SDMA) - sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, - queue_id); - else - BUG(); - - *out = ring; - return 0; - -clean_up_ring: - amdgpu_ring_fini(ring); -clean_up_memory: - kfree(ring); - return r; -} - -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring) - return; - - amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); - timer_delete_sync(&ring->fence_drv.fallback_timer); - amdgpu_ring_fini(ring); - kfree(ring); -} - uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio) { return adev->mes.aggregated_doorbells[prio]; } -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - int r; - - r = amdgpu_bo_create_kernel(adev, - sizeof(struct amdgpu_mes_ctx_meta_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create CTX bo failed\n", r); - return r; - } - - if (!ctx_data->meta_data_obj) - return -ENOMEM; - - memset(ctx_data->meta_data_ptr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data)); - - return 0; -} - -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) -{ - if (ctx_data->meta_data_obj) - amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); -} - -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va; - struct amdgpu_sync sync; - struct drm_exec exec; - int r; - - amdgpu_sync_create(&sync); - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - } - - bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); - if (!bo_va) { - DRM_ERROR("failed to create bo_va for meta data BO\n"); - r = -ENOMEM; - goto error_fini_exec; - } - - r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data), - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) { - DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); - goto error_del_bo_va; - } - - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) { - DRM_ERROR("failed to do vm_bo_update on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); - - r = amdgpu_vm_update_pdes(adev, vm, false); - if (r) { - DRM_ERROR("failed to update pdes on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); - - amdgpu_sync_wait(&sync, false); - drm_exec_fini(&exec); - - amdgpu_sync_free(&sync); - ctx_data->meta_data_va = bo_va; - return 0; - -error_del_bo_va: - amdgpu_vm_bo_del(adev, bo_va); - -error_fini_exec: - drm_exec_fini(&exec); - amdgpu_sync_free(&sync); - return r; -} - -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; - struct amdgpu_bo *bo = ctx_data->meta_data_obj; - struct amdgpu_vm *vm = bo_va->base.vm; - struct dma_fence *fence; - struct drm_exec exec; - long r; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - } - - amdgpu_vm_bo_del(adev, bo_va); - if (!amdgpu_vm_ready(vm)) - goto out_unlock; - - r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, - &fence); - if (r) - goto out_unlock; - if (fence) { - amdgpu_bo_fence(bo, fence, true); - fence = NULL; - } - - r = amdgpu_vm_clear_freed(adev, vm, &fence); - if (r || !fence) - goto out_unlock; - - dma_fence_wait(fence, false); - amdgpu_bo_fence(bo, fence, true); - dma_fence_put(fence); - -out_unlock: - if (unlikely(r < 0)) - dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); - drm_exec_fini(&exec); - - return r; -} - -static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev, - int pasid, int *gang_id, - int queue_type, int num_queue, - struct amdgpu_ring **added_rings, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_ring *ring; - struct amdgpu_mes_gang_properties gprops = {0}; - int r, j; - - /* create a gang for the process */ - gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.gang_quantum = adev->mes.default_gang_quantum; - gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; - - r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id); - if (r) { - DRM_ERROR("failed to add gang\n"); - return r; - } - - /* create queues for the gang */ - for (j = 0; j < num_queue; j++) { - r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j, - ctx_data, &ring); - if (r) { - DRM_ERROR("failed to add ring\n"); - break; - } - - DRM_INFO("ring %s was added\n", ring->name); - added_rings[j] = ring; - } - - return 0; -} - -static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings) -{ - struct amdgpu_ring *ring; - int i, r; - - for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) { - ring = added_rings[i]; - if (!ring) - continue; - - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - - r = amdgpu_ring_test_ib(ring, 1000 * 10); - if (r) { - DRM_DEV_ERROR(ring->adev->dev, - "ring %s ib test failed (%d)\n", - ring->name, r); - return r; - } else - DRM_INFO("ring %s ib test pass\n", ring->name); - } - - return 0; -} - -int amdgpu_mes_self_test(struct amdgpu_device *adev) -{ - struct amdgpu_vm *vm = NULL; - struct amdgpu_mes_ctx_data ctx_data = {0}; - struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL }; - int gang_ids[3] = {0}; - int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 }, - { AMDGPU_RING_TYPE_COMPUTE, 1 }, - { AMDGPU_RING_TYPE_SDMA, 1} }; - int i, r, pasid, k = 0; - - pasid = amdgpu_pasid_alloc(16); - if (pasid < 0) { - dev_warn(adev->dev, "No more PASIDs available!"); - pasid = 0; - } - - vm = kzalloc(sizeof(*vm), GFP_KERNEL); - if (!vm) { - r = -ENOMEM; - goto error_pasid; - } - - r = amdgpu_vm_init(adev, vm, -1); - if (r) { - DRM_ERROR("failed to initialize vm\n"); - goto error_pasid; - } - - r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data); - if (r) { - DRM_ERROR("failed to alloc ctx meta data\n"); - goto error_fini; - } - - ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; - r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); - if (r) { - DRM_ERROR("failed to map ctx meta data\n"); - goto error_vm; - } - - r = amdgpu_mes_create_process(adev, pasid, vm); - if (r) { - DRM_ERROR("failed to create MES process\n"); - goto error_vm; - } - - for (i = 0; i < ARRAY_SIZE(queue_types); i++) { - /* On GFX v10.3, fw hasn't supported to map sdma queue. */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= - IP_VERSION(10, 3, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < - IP_VERSION(11, 0, 0) && - queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) - continue; - - r = amdgpu_mes_test_create_gang_and_queues(adev, pasid, - &gang_ids[i], - queue_types[i][0], - queue_types[i][1], - &added_rings[k], - &ctx_data); - if (r) - goto error_queues; - - k += queue_types[i][1]; - } - - /* start ring test and ib test for MES queues */ - amdgpu_mes_test_queues(added_rings); - -error_queues: - /* remove all queues */ - for (i = 0; i < ARRAY_SIZE(added_rings); i++) { - if (!added_rings[i]) - continue; - amdgpu_mes_remove_ring(adev, added_rings[i]); - } - - for (i = 0; i < ARRAY_SIZE(gang_ids); i++) { - if (!gang_ids[i]) - continue; - amdgpu_mes_remove_gang(adev, gang_ids[i]); - } - - amdgpu_mes_destroy_process(adev, pasid); - -error_vm: - amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data); - -error_fini: - amdgpu_vm_fini(adev, vm); - -error_pasid: - if (pasid) - amdgpu_pasid_free(pasid); - - amdgpu_mes_ctx_free_meta_data(&ctx_data); - kfree(vm); - return 0; -} - int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) { const struct mes_firmware_header_v1_0 *mes_hdr; @@ -1705,7 +709,7 @@ int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { mutex_lock(&adev->enforce_isolation_mutex); for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i]) + if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE) r |= amdgpu_mes_set_enforce_isolation(adev, i, true); else r |= amdgpu_mes_set_enforce_isolation(adev, i, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index da2c9a8cb3e0..a41f65b4f733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -111,8 +111,8 @@ struct amdgpu_mes { uint32_t vmid_mask_gfxhub; uint32_t vmid_mask_mmhub; - uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; + uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; @@ -149,19 +149,6 @@ struct amdgpu_mes { }; -struct amdgpu_mes_process { - int pasid; - struct amdgpu_vm *vm; - uint64_t pd_gpu_addr; - struct amdgpu_bo *proc_ctx_bo; - uint64_t proc_ctx_gpu_addr; - void *proc_ctx_cpu_ptr; - uint64_t process_quantum; - struct list_head gang_list; - uint32_t doorbell_index; - struct mutex doorbell_lock; -}; - struct amdgpu_mes_gang { int gang_id; int priority; @@ -248,18 +235,6 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; -struct mes_reset_queue_input { - uint32_t doorbell_offset; - uint64_t gang_context_addr; - bool use_mmio; - uint32_t queue_type; - uint32_t me_id; - uint32_t pipe_id; - uint32_t queue_id; - uint32_t xcc_id; - uint32_t vmid; -}; - struct mes_map_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; @@ -291,7 +266,7 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; -struct mes_reset_legacy_queue_input { +struct mes_reset_queue_input { uint32_t queue_type; uint32_t doorbell_offset; bool use_mmio; @@ -301,6 +276,8 @@ struct mes_reset_legacy_queue_input { uint64_t mqd_addr; uint64_t wptr_addr; uint32_t vmid; + bool legacy_gfx; + bool is_kq; }; enum mes_misc_opcode { @@ -388,9 +365,6 @@ struct amdgpu_mes_funcs { int (*misc_op)(struct amdgpu_mes *mes, struct mes_misc_op_input *input); - int (*reset_legacy_queue)(struct amdgpu_mes *mes, - struct mes_reset_legacy_queue_input *input); - int (*reset_hw_queue)(struct amdgpu_mes *mes, struct mes_reset_queue_input *input); }; @@ -404,26 +378,9 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); int amdgpu_mes_init(struct amdgpu_device *adev); void amdgpu_mes_fini(struct amdgpu_device *adev); -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm); -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid); - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id); -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id); - int amdgpu_mes_suspend(struct amdgpu_device *adev); int amdgpu_mes_resume(struct amdgpu_device *adev); -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id); -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); -int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); -int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, - int me_id, int pipe_id, int queue_id, int vmid); - int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, @@ -451,27 +408,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, bool trap_en); int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr); -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out); -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring); uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio); -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); - -int amdgpu_mes_self_test(struct amdgpu_device *adev); - int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0b9987781f76..73403744331a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1044,7 +1044,8 @@ static const char * const amdgpu_vram_names[] = { "GDDR6", "DDR5", "LPDDR4", - "LPDDR5" + "LPDDR5", + "HBM3E" }; /** @@ -1644,7 +1645,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); - + /* Add the gem obj resv fence dump*/ + if (dma_resv_trylock(bo->tbo.base.resv)) { + dma_resv_describe(bo->tbo.base.resv, m); + dma_resv_unlock(bo->tbo.base.resv); + } seq_puts(m, "\n"); return size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index df5d5dbd7f0f..e6f0b035e20b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2214,7 +2214,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (!psp->securedisplay_context.context.bin_desc.size_bytes || !psp->securedisplay_context.context.bin_desc.start_addr) { - dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); + dev_info(psp->adev->dev, + "SECUREDISPLAY: optional securedisplay ta ucode is not available\n"); return 0; } @@ -4185,6 +4186,110 @@ const struct attribute_group amdgpu_flash_attr_group = { .is_visible = amdgpu_flash_attr_is_visible, }; +#if defined(CONFIG_DEBUG_FS) +static int psp_read_spirom_debugfs_open(struct inode *inode, struct file *filp) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct spirom_bo *bo_triplet; + int ret; + + /* serialize the open() file calling */ + if (!mutex_trylock(&adev->psp.mutex)) + return -EBUSY; + + /* + * make sure only one userpace process is alive for dumping so that + * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is consumed. + * let's say the case where one process try opening the file while + * another one has proceeded to read or release. In this way, eliminate + * the use of mutex for read() or release() callback as well. + */ + if (adev->psp.spirom_dump_trip) { + mutex_unlock(&adev->psp.mutex); + return -EBUSY; + } + + bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL); + if (!bo_triplet) { + mutex_unlock(&adev->psp.mutex); + return -ENOMEM; + } + + ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2, + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &bo_triplet->bo, + &bo_triplet->mc_addr, + &bo_triplet->cpu_addr); + if (ret) + goto rel_trip; + + ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr); + if (ret) + goto rel_bo; + + adev->psp.spirom_dump_trip = bo_triplet; + mutex_unlock(&adev->psp.mutex); + return 0; +rel_bo: + amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr, + &bo_triplet->cpu_addr); +rel_trip: + kfree(bo_triplet); + mutex_unlock(&adev->psp.mutex); + dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret); + return ret; +} + +static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user *buf, size_t size, + loff_t *pos) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip; + + if (!bo_triplet) + return -EINVAL; + + return simple_read_from_buffer(buf, + size, + pos, bo_triplet->cpu_addr, + AMD_VBIOS_FILE_MAX_SIZE_B * 2); +} + +static int psp_read_spirom_debugfs_release(struct inode *inode, struct file *filp) +{ + struct amdgpu_device *adev = filp->f_inode->i_private; + struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip; + + if (bo_triplet) { + amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr, + &bo_triplet->cpu_addr); + kfree(bo_triplet); + } + + adev->psp.spirom_dump_trip = NULL; + return 0; +} + +static const struct file_operations psp_dump_spirom_debugfs_ops = { + .owner = THIS_MODULE, + .open = psp_read_spirom_debugfs_open, + .read = psp_read_spirom_debugfs_read, + .release = psp_read_spirom_debugfs_release, + .llseek = default_llseek, +}; +#endif + +void amdgpu_psp_debugfs_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + + debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root, + adev, &psp_dump_spirom_debugfs_ops, AMD_VBIOS_FILE_MAX_SIZE_B * 2); +#endif +} + const struct amd_ip_funcs psp_ip_funcs = { .name = "psp", .early_init = psp_early_init, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 8d5acc415d38..428adc7f741d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -39,6 +39,18 @@ #define PSP_TMR_ALIGNMENT 0x100000 #define PSP_FW_NAME_LEN 0x24 +/* VBIOS gfl defines */ +#define MBOX_READY_MASK 0x80000000 +#define MBOX_STATUS_MASK 0x0000FFFF +#define MBOX_COMMAND_MASK 0x00FF0000 +#define MBOX_READY_FLAG 0x80000000 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 +#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 +#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf +#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10 +#define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11 + extern const struct attribute_group amdgpu_flash_attr_group; enum psp_shared_mem_size { @@ -107,6 +119,7 @@ enum psp_reg_prog_id { PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */ PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */ PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */ + PSP_REG_MMHUB_L1_TLB_CNTL = 25, PSP_REG_LAST }; @@ -137,11 +150,14 @@ struct psp_funcs { int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver); int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); + int (*dump_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); bool (*get_ras_capability)(struct psp_context *psp); bool (*is_aux_sos_load_required)(struct psp_context *psp); bool (*is_reload_needed)(struct psp_context *psp); + int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id); }; struct ta_funcs { @@ -319,6 +335,14 @@ struct psp_runtime_scpm_entry { enum psp_runtime_scpm_authentication scpm_status; }; +#if defined(CONFIG_DEBUG_FS) +struct spirom_bo { + struct amdgpu_bo *bo; + uint64_t mc_addr; + void *cpu_addr; +}; +#endif + struct psp_context { struct amdgpu_device *adev; struct psp_ring km_ring; @@ -406,6 +430,9 @@ struct psp_context { char *vbflash_tmp_buf; size_t vbflash_image_size; bool vbflash_done; +#if defined(CONFIG_DEBUG_FS) + struct spirom_bo *spirom_dump_trip; +#endif }; struct amdgpu_psp_funcs { @@ -464,6 +491,10 @@ struct amdgpu_psp_funcs { ((psp)->funcs->update_spirom ? \ (psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL) +#define psp_dump_spirom(psp, fw_pri_mc_addr) \ + ((psp)->funcs->dump_spirom ? \ + (psp)->funcs->dump_spirom((psp), fw_pri_mc_addr) : -EINVAL) + #define psp_vbflash_status(psp) \ ((psp)->funcs->vbflash_stat ? \ (psp)->funcs->vbflash_stat((psp)) : -EINVAL) @@ -475,6 +506,10 @@ struct amdgpu_psp_funcs { #define psp_is_aux_sos_load_required(psp) \ ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) +#define psp_reg_program_no_ring(psp, val, id) \ + ((psp)->funcs->reg_program_no_ring ? \ + (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -569,5 +604,9 @@ bool amdgpu_psp_get_ras_capability(struct psp_context *psp); int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev); +int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id); +void amdgpu_psp_debugfs_init(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 443409d4f4b0..dc07936d2fcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1498,6 +1498,9 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, !amdgpu_ras_get_aca_debug_mode(adev)) return -EOPNOTSUPP; + if (amdgpu_sriov_vf(adev)) + return -EOPNOTSUPP; + /* skip ras error reset in gpu reset */ if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) && ((smu_funcs && smu_funcs->set_debug_mode) || @@ -2161,7 +2164,7 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) /* Fatal error events are handled on host side */ if (amdgpu_sriov_vf(adev)) return; - /** + /* * If the current interrupt is caused by a non-fatal RAS error, skip * check for fatal error. For fatal errors, FED status of all devices * in XGMI hive gets set when the first device gets fatal error @@ -2886,6 +2889,7 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) return -EINVAL; } + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit); } @@ -2900,7 +2904,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, &adev->psp.ras_context.ras->eeprom_control; enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; int ret = 0; - uint32_t i; + uint32_t i = 0; if (!con || !con->eh_data || !bps || pages <= 0) return 0; @@ -2921,34 +2925,36 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); if (from_rom) { - for (i = 0; i < pages; i++) { - if (control->ras_num_recs - i >= adev->umc.retire_unit) { - if ((bps[i].address == bps[i + 1].address) && - (bps[i].mem_channel == bps[i + 1].mem_channel)) { - //deal with retire_unit records a time - ret = __amdgpu_ras_convert_rec_array_from_rom(adev, - &bps[i], &err_data, nps); - if (ret) - goto free; - i += (adev->umc.retire_unit - 1); + /* there is no pa recs in V3, so skip pa recs processing */ + if (control->tbl_hdr.version < RAS_TABLE_VER_V3) { + for (i = 0; i < pages; i++) { + if (control->ras_num_recs - i >= adev->umc.retire_unit) { + if ((bps[i].address == bps[i + 1].address) && + (bps[i].mem_channel == bps[i + 1].mem_channel)) { + /* deal with retire_unit records a time */ + ret = __amdgpu_ras_convert_rec_array_from_rom(adev, + &bps[i], &err_data, nps); + if (ret) + control->ras_num_bad_pages -= adev->umc.retire_unit; + i += (adev->umc.retire_unit - 1); + } else { + break; + } } else { break; } - } else { - break; } } for (; i < pages; i++) { ret = __amdgpu_ras_convert_rec_from_rom(adev, &bps[i], &err_data, nps); if (ret) - goto free; + control->ras_num_bad_pages -= adev->umc.retire_unit; } } else { ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages); } -free: if (from_rom) kfree(err_data.err_addr); mutex_unlock(&con->recovery_lock); @@ -3037,21 +3043,28 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) dev_err(adev->dev, "Failed to load EEPROM table records!"); } else { if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { - for (i = 0; i < control->ras_num_recs; i++) { - if ((control->ras_num_recs - i) >= adev->umc.retire_unit) { - if ((bps[i].address == bps[i + 1].address) && - (bps[i].mem_channel == bps[i + 1].mem_channel)) { - control->ras_num_pa_recs += adev->umc.retire_unit; - i += (adev->umc.retire_unit - 1); + /*In V3, there is no pa recs, and some cases(when address==0) may be parsed + as pa recs, so add verion check to avoid it. + */ + if (control->tbl_hdr.version < RAS_TABLE_VER_V3) { + for (i = 0; i < control->ras_num_recs; i++) { + if ((control->ras_num_recs - i) >= adev->umc.retire_unit) { + if ((bps[i].address == bps[i + 1].address) && + (bps[i].mem_channel == bps[i + 1].mem_channel)) { + control->ras_num_pa_recs += adev->umc.retire_unit; + i += (adev->umc.retire_unit - 1); + } else { + control->ras_num_mca_recs += + (control->ras_num_recs - i); + break; + } } else { - control->ras_num_mca_recs += - (control->ras_num_recs - i); + control->ras_num_mca_recs += (control->ras_num_recs - i); break; } - } else { - control->ras_num_mca_recs += (control->ras_num_recs - i); - break; } + } else { + control->ras_num_mca_recs = control->ras_num_recs; } } @@ -3460,6 +3473,10 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr) control->ras_num_pa_recs = control->ras_num_recs; + if (adev->umc.ras && + adev->umc.ras->get_retire_flip_bits) + adev->umc.ras->get_retire_flip_bits(adev); + if (control->ras_num_recs) { ret = amdgpu_ras_load_bad_pages(adev); if (ret) @@ -3793,10 +3810,12 @@ init_ras_enabled_flag: adev->ras_hw_enabled & amdgpu_ras_mask; /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */ - adev->aca.is_enabled = - (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || - amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); + if (!amdgpu_sriov_vf(adev)) { + adev->aca.is_enabled = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); + } /* bad page feature is not applicable to specific app platform */ if (adev->gmc.is_app_apu && @@ -4479,8 +4498,11 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) enum ras_event_type type = RAS_EVENT_TYPE_FATAL; u64 event_id; - if (amdgpu_ras_mark_ras_event(adev, type)) + if (amdgpu_ras_mark_ras_event(adev, type)) { + dev_err(adev->dev, + "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n"); return; + } event_id = amdgpu_ras_acquire_event_id(adev, type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 0ea7cfaf3587..2c58e09e56f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -418,6 +418,7 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control hdr->version = RAS_TABLE_VER_V2_1; return; case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 5, 0): hdr->version = RAS_TABLE_VER_V3; return; default: @@ -1392,17 +1393,39 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) __decode_table_header_from_buf(hdr, buf); - if (hdr->version >= RAS_TABLE_VER_V2_1) { + if (hdr->header != RAS_TABLE_HDR_VAL && + hdr->header != RAS_TABLE_HDR_BAD) { + dev_info(adev->dev, "Creating a new EEPROM table"); + return amdgpu_ras_eeprom_reset_table(control); + } + + switch (hdr->version) { + case RAS_TABLE_VER_V2_1: + case RAS_TABLE_VER_V3: control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); control->ras_record_offset = RAS_RECORD_START_V2_1; control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; - } else { + break; + case RAS_TABLE_VER_V1: control->ras_num_recs = RAS_NUM_RECS(hdr); control->ras_record_offset = RAS_RECORD_START; control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + break; + default: + dev_err(adev->dev, + "RAS header invalid, unsupported version: %u", + hdr->version); + return -EINVAL; } - control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); + if (control->ras_num_recs > control->ras_max_record_count) { + dev_err(adev->dev, + "RAS header invalid, records in header: %u max allowed :%u", + control->ras_num_recs, control->ras_max_record_count); + return -EINVAL; + } + + control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); control->ras_num_mca_recs = 0; control->ras_num_pa_recs = 0; return 0; @@ -1413,7 +1436,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - int res; + int res = 0; if (!__is_ras_eeprom_supported(adev)) return 0; @@ -1494,10 +1517,6 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) "User defined threshold is set, runtime service will be halt when threshold is reached\n"); } } - } else { - DRM_INFO("Creating a new EEPROM table"); - - res = amdgpu_ras_eeprom_reset_table(control); } return res < 0 ? res : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 59acdbfe28d8..426834806fbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -187,14 +187,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } #define amdgpu_ring_get_gpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : \ - (ring->adev->wb.gpu_addr + offset * 4)) + (ring->adev->wb.gpu_addr + offset * 4) #define amdgpu_ring_get_cpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - (&ring->adev->wb.wb[offset])) + (&ring->adev->wb.wb[offset]) /** * amdgpu_ring_init - init driver ring struct. @@ -243,57 +239,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->sched_score = sched_score; ring->vmid_wait = dma_fence_get_stub(); - if (!ring->is_mes_queue) { - ring->idx = adev->num_rings++; - adev->rings[ring->idx] = ring; - } + ring->idx = adev->num_rings++; + adev->rings[ring->idx] = ring; r = amdgpu_fence_driver_init_ring(ring); if (r) return r; } - if (ring->is_mes_queue) { - ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RPTR_OFFS); - ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_WPTR_OFFS); - ring->fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_FENCE_OFFS); - ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_TRAIL_FENCE_OFFS); - ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_COND_EXE_OFFS); - } else { - r = amdgpu_device_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); - if (r) { - dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); + if (r) { + dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); + return r; } ring->fence_gpu_addr = @@ -353,18 +334,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->cached_rptr = 0; /* Allocate ring buffer */ - if (ring->is_mes_queue) { - int offset = 0; - - BUG_ON(ring->ring_size > PAGE_SIZE*4); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RING_OFFS); - ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - amdgpu_ring_clear_ring(ring); - - } else if (ring->ring_obj == NULL) { + if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, @@ -401,32 +371,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { /* Not to finish a ring which is not initialized */ - if (!(ring->adev) || - (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) + if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; ring->sched.ready = false; - if (!ring->is_mes_queue) { - amdgpu_device_wb_free(ring->adev, ring->rptr_offs); - amdgpu_device_wb_free(ring->adev, ring->wptr_offs); + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); + amdgpu_device_wb_free(ring->adev, ring->wptr_offs); - amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_device_wb_free(ring->adev, ring->fence_offs); + amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_device_wb_free(ring->adev, ring->fence_offs); - amdgpu_bo_free_kernel(&ring->ring_obj, - &ring->gpu_addr, - (void **)&ring->ring); - } else { - kfree(ring->fence_drv.fences); - } + amdgpu_bo_free_kernel(&ring->ring_obj, + &ring->gpu_addr, + (void **)&ring->ring); dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - if (!ring->is_mes_queue) - ring->adev->rings[ring->idx] = NULL; + ring->adev->rings[ring->idx] = NULL; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bb2b66385223..b95b47110769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -164,8 +164,24 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, /* provided by hw blocks that expose a ring buffer for commands */ struct amdgpu_ring_funcs { + /** + * @type: + * + * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER + * use ring buffers. The type field just identifies which component the + * ring buffer is associated with. + */ enum amdgpu_ring_type type; uint32_t align_mask; + + /** + * @nop: + * + * Every block in the amdgpu has no-op instructions (e.g., GFX 10 + * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP, + * etc). This field receives the specific no-op for the component + * that initializes the ring. + */ u32 nop; bool support_64bit_ptrs; bool no_user_fence; @@ -241,6 +257,9 @@ struct amdgpu_ring_funcs { bool (*is_guilty)(struct amdgpu_ring *ring); }; +/** + * amdgpu_ring - Holds ring information + */ struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; @@ -252,13 +271,61 @@ struct amdgpu_ring { unsigned rptr_offs; u64 rptr_gpu_addr; volatile u32 *rptr_cpu_addr; + + /** + * @wptr: + * + * This is part of the Ring buffer implementation and represents the + * write pointer. The wptr determines where the host has written. + */ u64 wptr; + + /** + * @wptr_old: + * + * Before update wptr with the new value, usually the old value is + * stored in the wptr_old. + */ u64 wptr_old; unsigned ring_size; + + /** + * @max_dw: + * + * Maximum number of DWords for ring allocation. This information is + * provided at the ring initialization time, and each IP block can + * specify a specific value. Check places that invoke + * amdgpu_ring_init() to see the maximum size per block. + */ unsigned max_dw; + + /** + * @count_dw: + * + * This value starts with the maximum amount of DWords supported by the + * ring. This value is updated based on the ring manipulation. + */ int count_dw; uint64_t gpu_addr; + + /** + * @ptr_mask: + * + * Some IPs provide support for 64-bit pointers and others for 32-bit + * only; this behavior is component-specific and defined by the field + * support_64bit_ptr. If the IP block supports 64-bits, the mask + * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask. + * Notice that this field is used to keep wptr under a valid range. + */ uint64_t ptr_mask; + + /** + * @buf_mask: + * + * Buffer mask is a value used to keep wptr count under its + * thresholding. Buffer mask initialized during the ring buffer + * initialization time, and it is defined as (ring_size / 4) -1. + */ uint32_t buf_mask; u32 idx; u32 xcc_id; @@ -276,6 +343,13 @@ struct amdgpu_ring { bool use_pollmem; unsigned wptr_offs; u64 wptr_gpu_addr; + + /** + * @wptr_cpu_addr: + * + * This is the CPU address pointer in the writeback slot. This is used + * to commit changes to the GPU. + */ volatile u32 *wptr_cpu_addr; unsigned fence_offs; u64 fence_gpu_addr; @@ -297,20 +371,15 @@ struct amdgpu_ring { struct dma_fence *vmid_wait; bool has_compute_vm_bug; bool no_scheduler; + bool no_user_submission; int hw_prio; unsigned num_hw_submission; atomic_t *sched_score; - /* used for mes */ - bool is_mes_queue; - uint32_t hw_queue_id; - struct amdgpu_mes_ctx_data *mes_ctx; - bool is_sw_ring; unsigned int entry_index; /* store the cached rptr to restore after reset */ uint64_t cached_rptr; - }; #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) @@ -435,15 +504,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, ring->ring[offset] = cur - offset; } -#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) - -#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - NULL) - int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index fce22d3f816b..c210625be220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -237,6 +237,20 @@ struct amdgpu_rlc_funcs { void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id); int (*init)(struct amdgpu_device *adev); u32 (*get_csb_size)(struct amdgpu_device *adev); + + /** + * @get_csb_buffer: Get the clear state to be put into the hardware. + * + * The parameter adev is used to get the CS data and other gfx info, + * and buffer is the RLC CS pointer + * + * Sometimes, the user space puts a request to clear the state in the + * command buffer; this function provides the clear state that gets put + * into the hardware. Note that the driver programs Clear State + * Indirect Buffer (CSB) explicitly when it sets up the kernel rings, + * and it also provides a pointer to it which is used by the firmware + * to load the clear state in some cases. + */ void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); int (*get_cp_table_num)(struct amdgpu_device *adev); int (*resume)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 529c9696c2f3..6716ac281c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -26,6 +26,8 @@ #include "amdgpu_sdma.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_3_0_sh_mask.h" #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ @@ -76,22 +78,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) return 0; - if (ring->is_mes_queue) { - uint32_t offset = 0; + r = amdgpu_sdma_get_index_from_ring(ring, &index); - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - sdma[ring->idx].sdma_meta_data); - csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - r = amdgpu_sdma_get_index_from_ring(ring, &index); - - if (r || index > 31) - csa_mc_addr = 0; - else - csa_mc_addr = amdgpu_csa_vaddr(adev) + - AMDGPU_CSA_SDMA_OFFSET + - index * AMDGPU_CSA_SDMA_SIZE; - } + if (r || index > 31) + csa_mc_addr = 0; + else + csa_mc_addr = amdgpu_csa_vaddr(adev) + + AMDGPU_CSA_SDMA_OFFSET + + index * AMDGPU_CSA_SDMA_SIZE; return csa_mc_addr; } @@ -537,28 +531,38 @@ bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_rin return false; } -/** - * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks - * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions - * - * This function allows KFD and AMDGPU to register their own callbacks for handling - * pre-reset and post-reset operations for engine reset. These are needed because engine - * reset will stop all queues on that engine. - */ -void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs) +static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) { - if (!funcs) - return; + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; + int r = -EOPNOTSUPP; - /* Ensure the reset_callback_list is initialized */ - if (!adev->sdma.reset_callback_list.next) { - INIT_LIST_HEAD(&adev->sdma.reset_callback_list); + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(4, 4, 2): + case IP_VERSION(4, 4, 4): + case IP_VERSION(4, 4, 5): + /* For SDMA 4.x, use the existing DPM interface for backward compatibility */ + r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + break; + case IP_VERSION(5, 0, 0): + case IP_VERSION(5, 0, 1): + case IP_VERSION(5, 0, 2): + case IP_VERSION(5, 0, 5): + case IP_VERSION(5, 2, 0): + case IP_VERSION(5, 2, 2): + case IP_VERSION(5, 2, 4): + case IP_VERSION(5, 2, 5): + case IP_VERSION(5, 2, 6): + case IP_VERSION(5, 2, 3): + case IP_VERSION(5, 2, 1): + case IP_VERSION(5, 2, 7): + if (sdma_instance->funcs->soft_reset_kernel_queue) + r = sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id); + break; + default: + break; } - /* Initialize the list node in the callback structure */ - INIT_LIST_HEAD(&funcs->list); - /* Add the callback structure to the global list */ - list_add_tail(&funcs->list, &adev->sdma.reset_callback_list); + return r; } /** @@ -566,16 +570,10 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct * @adev: Pointer to the AMDGPU device * @instance_id: ID of the SDMA engine instance to reset * - * This function performs the following steps: - * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. - * 2. Resets the specified SDMA engine instance. - * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state. - * * Returns: 0 on success, or a negative error code on failure. */ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) { - struct sdma_on_reset_funcs *funcs; int ret = 0; struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; struct amdgpu_ring *gfx_ring = &sdma_instance->ring; @@ -597,38 +595,18 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) page_sched_stopped = true; } - /* Invoke all registered pre_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->pre_reset) { - ret = funcs->pre_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "beforeReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } - } + if (sdma_instance->funcs->stop_kernel_queue) + sdma_instance->funcs->stop_kernel_queue(gfx_ring); /* Perform the SDMA reset for the specified instance */ - ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + ret = amdgpu_sdma_soft_reset(adev, instance_id); if (ret) { dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); goto exit; } - /* Invoke all registered post_reset callbacks */ - list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { - if (funcs->post_reset) { - ret = funcs->post_reset(adev, instance_id); - if (ret) { - dev_err(adev->dev, - "afterReset callback failed for instance %u: %d\n", - instance_id, ret); - goto exit; - } - } - } + if (sdma_instance->funcs->start_kernel_queue) + sdma_instance->funcs->start_kernel_queue(gfx_ring); exit: /* Restart the scheduler's work queue for the GFX and page rings diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 47d56fd0589f..5605921212f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -50,6 +50,12 @@ enum amdgpu_sdma_irq { #define NUM_SDMA(x) hweight32(x) +struct amdgpu_sdma_funcs { + int (*stop_kernel_queue)(struct amdgpu_ring *ring); + int (*start_kernel_queue)(struct amdgpu_ring *ring); + int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id); +}; + struct amdgpu_sdma_instance { /* SDMA firmware */ const struct firmware *fw; @@ -68,7 +74,7 @@ struct amdgpu_sdma_instance { /* track guilty state of GFX and PAGE queues */ bool gfx_guilty; bool page_guilty; - + const struct amdgpu_sdma_funcs *funcs; }; enum amdgpu_sdma_ras_memory_id { @@ -103,13 +109,6 @@ struct amdgpu_sdma_ras { struct amdgpu_ras_block_object ras_block; }; -struct sdma_on_reset_funcs { - int (*pre_reset)(struct amdgpu_device *adev, uint32_t instance_id); - int (*post_reset)(struct amdgpu_device *adev, uint32_t instance_id); - /* Linked list node to store this structure in a list; */ - struct list_head list; -}; - struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_irq_src trap_irq; @@ -131,6 +130,8 @@ struct amdgpu_sdma { uint32_t *ip_dump; uint32_t supported_reset; struct list_head reset_callback_list; + bool no_user_submission; + bool disable_uq; }; /* @@ -170,7 +171,6 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; -void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index e22cb2b5cd92..3939761be31c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -45,7 +45,11 @@ */ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) { - return AMDGPU_VA_RESERVED_SEQ64_START(adev); + u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev); + + addr = amdgpu_gmc_sign_extend(addr); + + return addr; } /** @@ -63,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + u64 seq64_addr, va_flags; struct amdgpu_bo *bo; struct drm_exec exec; - u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -88,9 +92,11 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error; } - seq64_addr = amdgpu_seq64_get_va_base(adev); + seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; + + va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, - AMDGPU_PTE_READABLE); + va_flags); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); @@ -156,6 +162,7 @@ error: * * @adev: amdgpu_device pointer * @va: VA to access the seq in process address space + * @gpu_addr: GPU address to access the seq * @cpu_addr: CPU address to access the seq * * Alloc a 64 bit memory from seq64 pool. @@ -163,7 +170,8 @@ error: * Returns: * 0 on success or a negative error code on failure */ -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, + u64 *gpu_addr, u64 **cpu_addr) { unsigned long bit_pos; @@ -172,7 +180,12 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) return -ENOSPC; __set_bit(bit_pos, adev->seq64.used); + *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); + + if (gpu_addr) + *gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr; + *cpu_addr = bit_pos + adev->seq64.cpu_base_addr; return 0; @@ -233,7 +246,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev) */ r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &adev->seq64.sbo, NULL, + &adev->seq64.sbo, &adev->seq64.gpu_addr, (void **)&adev->seq64.cpu_base_addr); if (r) { dev_warn(adev->dev, "(%d) create seq64 failed\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h index 4203b2ab318d..26a249aaaee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -32,13 +32,14 @@ struct amdgpu_seq64 { struct amdgpu_bo *sbo; u32 num_sem; + u64 gpu_addr; u64 *cpu_base_addr; DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS); }; void amdgpu_seq64_fini(struct amdgpu_device *adev); int amdgpu_seq64_init(struct amdgpu_device *adev); -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr); void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 5576ed0b508f..d6ae9974c952 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -249,9 +249,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL; - - /* TODO: Use DMA_RESV_USAGE_READ here */ - dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { + /* Implicitly sync only to KERNEL, WRITE and READ */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 53b71e9d8076..9c5df35f05b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2081,6 +2081,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); amdgpu_preempt_mgr_fini(adev); + amdgpu_doorbell_fini(adev); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 3d9e9fdc10b4..4a72c2bbd49e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -767,6 +767,7 @@ FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK); FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK); +FW_VERSION_ATTR(pldm_fw_version, 0444, firmware.pldm_version); static struct attribute *fw_attrs[] = { &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, @@ -781,7 +782,7 @@ static struct attribute *fw_attrs[] = { &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, &dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr, &dev_attr_mes_fw_version.attr, &dev_attr_mes_kiq_fw_version.attr, - NULL + &dev_attr_pldm_fw_version.attr, NULL }; #define to_dev_attr(x) container_of(x, struct device_attribute, attr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4eedd92f000b..9e89c3487be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -25,6 +25,8 @@ #include "amdgpu_socbb.h" +#define RS64_FW_UC_START_ADDR_LO 0x3000 + struct common_firmware_header { uint32_t size_bytes; /* size of the entire header+image(s) in bytes */ uint32_t header_size_bytes; /* size of just the header in bytes */ @@ -600,6 +602,7 @@ struct amdgpu_firmware { void *fw_buf_ptr; uint64_t fw_buf_mc; + uint32_t pldm_version; }; void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0a1ef95b2866..8c6e55b5b967 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -529,6 +529,7 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, pfns[i] = err_data.err_addr[i].retired_page; } ret = i; + adev->umc.err_addr_cnt = err_data.err_addr_cnt; out: kfree(err_data.err_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 857693bcd8d4..29ce6b1d214a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -78,6 +78,18 @@ #define UMC_NPS_SHIFT 40 #define UMC_NPS_MASK 0xffULL +/* three column bits and one row bit in MCA address flip + * in bad page retirement + */ +#define RETIRE_FLIP_BITS_NUM 4 + +struct amdgpu_umc_flip_bits { + uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM]; + uint32_t flip_row_bit; + uint32_t r13_in_pa; + uint32_t bit_num; +}; + typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -100,6 +112,7 @@ struct amdgpu_umc_ras { bool dump_addr); uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev, uint64_t mca_addr, uint64_t retired_page); + void (*get_retire_flip_bits)(struct amdgpu_device *adev); }; struct amdgpu_umc_funcs { @@ -130,6 +143,10 @@ struct amdgpu_umc { /* active mask for umc node instance */ unsigned long active_mask; + + struct amdgpu_umc_flip_bits flip_bits; + + unsigned long err_addr_cnt; }; int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c new file mode 100644 index 000000000000..295e7186e156 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -0,0 +1,924 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_userq.h" +#include "amdgpu_userq_fence.h" + +u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) +{ + int i; + u32 userq_ip_mask = 0; + + for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { + if (adev->userq_funcs[i]) + userq_ip_mask |= (1 << i); + } + + return userq_ip_mask; +} + +static int +amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { + r = userq_funcs->unmap(uq_mgr, queue); + if (r) + queue->state = AMDGPU_USERQ_STATE_HUNG; + else + queue->state = AMDGPU_USERQ_STATE_UNMAPPED; + } + return r; +} + +static int +amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { + r = userq_funcs->map(uq_mgr, queue); + if (r) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + } else { + queue->state = AMDGPU_USERQ_STATE_MAPPED; + } + } + return r; +} + +static void +amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct dma_fence *f = queue->last_fence; + int ret; + + if (f && !dma_fence_is_signaled(f)) { + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) + drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", + f->context, f->seqno); + } +} + +static void +amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + int queue_id) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + + uq_funcs->mqd_destroy(uq_mgr, queue); + amdgpu_userq_fence_driver_free(queue); + idr_remove(&uq_mgr->userq_idr, queue_id); + kfree(queue); +} + +int +amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + mutex_lock(&uq_mgr->userq_mutex); + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret += queue->state == AMDGPU_USERQ_STATE_MAPPED; + + mutex_unlock(&uq_mgr->userq_mutex); + return ret; +} + +static struct amdgpu_usermode_queue * +amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) +{ + return idr_find(&uq_mgr->userq_idr, qid); +} + +void +amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + +retry: + /* Flush any pending resume work to create ev_fence */ + flush_delayed_work(&uq_mgr->resume_work); + + mutex_lock(&uq_mgr->userq_mutex); + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { + mutex_unlock(&uq_mgr->userq_mutex); + /* + * Looks like there was no pending resume work, + * add one now to create a valid eviction fence + */ + schedule_delayed_work(&uq_mgr->resume_work, 0); + goto retry; + } +} + +int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj, + int size) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_param bp; + int r; + + memset(&bp, 0, sizeof(bp)); + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + bp.type = ttm_bo_type_kernel; + bp.size = size; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &userq_obj->obj); + if (r) { + drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r); + return r; + } + + r = amdgpu_bo_reserve(userq_obj->obj, true); + if (r) { + drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r); + goto free_obj; + } + + r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); + if (r) { + drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); + goto unresv; + } + + r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); + if (r) { + drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); + goto unresv; + } + + userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); + amdgpu_bo_unreserve(userq_obj->obj); + memset(userq_obj->cpu_ptr, 0, size); + return 0; + +unresv: + amdgpu_bo_unreserve(userq_obj->obj); + +free_obj: + amdgpu_bo_unref(&userq_obj->obj); + return r; +} + +void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj) +{ + amdgpu_bo_kunmap(userq_obj->obj); + amdgpu_bo_unref(&userq_obj->obj); +} + +uint64_t +amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_db_info *db_info, + struct drm_file *filp) +{ + uint64_t index; + struct drm_gem_object *gobj; + struct amdgpu_userq_obj *db_obj = db_info->db_obj; + int r, db_size; + + gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle); + if (gobj == NULL) { + drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n"); + return -EINVAL; + } + + db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + drm_gem_object_put(gobj); + + r = amdgpu_bo_reserve(db_obj->obj, true); + if (r) { + drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); + goto unref_bo; + } + + /* Pin the BO before generating the index, unpin in queue destroy */ + r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL); + if (r) { + drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); + goto unresv_bo; + } + + switch (db_info->queue_type) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_HW_IP_COMPUTE: + case AMDGPU_HW_IP_DMA: + db_size = sizeof(u64); + break; + + case AMDGPU_HW_IP_VCN_ENC: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1; + break; + + case AMDGPU_HW_IP_VPE: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1; + break; + + default: + drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n", + db_info->queue_type); + r = -EINVAL; + goto unpin_bo; + } + + index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, + db_info->doorbell_offset, db_size); + drm_dbg_driver(adev_to_drm(uq_mgr->adev), + "[Usermode queues] doorbell index=%lld\n", index); + amdgpu_bo_unreserve(db_obj->obj); + return index; + +unpin_bo: + amdgpu_bo_unpin(db_obj->obj); +unresv_bo: + amdgpu_bo_unreserve(db_obj->obj); +unref_bo: + amdgpu_bo_unref(&db_obj->obj); + return r; +} + +static int +amdgpu_userq_destroy(struct drm_file *filp, int queue_id) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_usermode_queue *queue; + int r = 0; + + cancel_delayed_work_sync(&uq_mgr->resume_work); + mutex_lock(&uq_mgr->userq_mutex); + + queue = amdgpu_userq_find(uq_mgr, queue_id); + if (!queue) { + drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n"); + mutex_unlock(&uq_mgr->userq_mutex); + return -EINVAL; + } + amdgpu_userq_wait_for_last_fence(uq_mgr, queue); + r = amdgpu_bo_reserve(queue->db_obj.obj, true); + if (!r) { + amdgpu_bo_unpin(queue->db_obj.obj); + amdgpu_bo_unreserve(queue->db_obj.obj); + } + amdgpu_bo_unref(&queue->db_obj.obj); + r = amdgpu_userq_unmap_helper(uq_mgr, queue); + amdgpu_userq_cleanup(uq_mgr, queue, queue_id); + mutex_unlock(&uq_mgr->userq_mutex); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static int amdgpu_userq_priority_permit(struct drm_file *filp, + int priority) +{ + if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int +amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *uq_funcs; + struct amdgpu_usermode_queue *queue; + struct amdgpu_db_info db_info; + bool skip_map_queue; + uint64_t index; + int qid, r = 0; + int priority = + (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> + AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; + + /* Usermode queues are only supported for GFX IP as of now */ + if (args->in.ip_type != AMDGPU_HW_IP_GFX && + args->in.ip_type != AMDGPU_HW_IP_DMA && + args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { + drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n", + args->in.ip_type); + return -EINVAL; + } + + r = amdgpu_userq_priority_permit(filp, priority); + if (r) + return r; + + if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && + (args->in.ip_type != AMDGPU_HW_IP_GFX) && + (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && + !amdgpu_is_tmz(adev)) { + drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n"); + return -EINVAL; + } + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n"); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + /* + * There could be a situation that we are creating a new queue while + * the other queues under this UQ_mgr are suspended. So if there is any + * resume work pending, wait for it to get done. + * + * This will also make sure we have a valid eviction fence ready to be used. + */ + mutex_lock(&adev->userq_mutex); + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + + uq_funcs = adev->userq_funcs[args->in.ip_type]; + if (!uq_funcs) { + drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", + args->in.ip_type); + r = -EINVAL; + goto unlock; + } + + queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL); + if (!queue) { + drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); + r = -ENOMEM; + goto unlock; + } + queue->doorbell_handle = args->in.doorbell_handle; + queue->queue_type = args->in.ip_type; + queue->vm = &fpriv->vm; + queue->priority = priority; + + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; + + /* Convert relative doorbell offset into absolute doorbell index */ + index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); + if (index == (uint64_t)-EINVAL) { + drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); + kfree(queue); + goto unlock; + } + + queue->doorbell_index = index; + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); + r = amdgpu_userq_fence_driver_alloc(adev, queue); + if (r) { + drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); + goto unlock; + } + + r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); + if (r) { + drm_file_err(uq_mgr->file, "Failed to create Queue\n"); + amdgpu_userq_fence_driver_free(queue); + kfree(queue); + goto unlock; + } + + + qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); + if (qid < 0) { + drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); + amdgpu_userq_fence_driver_free(queue); + uq_funcs->mqd_destroy(uq_mgr, queue); + kfree(queue); + r = -ENOMEM; + goto unlock; + } + + /* don't map the queue if scheduling is halted */ + if (adev->userq_halt_for_enforce_isolation && + ((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) + skip_map_queue = true; + else + skip_map_queue = false; + if (!skip_map_queue) { + r = amdgpu_userq_map_helper(uq_mgr, queue); + if (r) { + drm_file_err(uq_mgr->file, "Failed to map Queue\n"); + idr_remove(&uq_mgr->userq_idr, qid); + amdgpu_userq_fence_driver_free(queue); + uq_funcs->mqd_destroy(uq_mgr, queue); + kfree(queue); + goto unlock; + } + } + + + args->out.queue_id = qid; + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); + mutex_unlock(&adev->userq_mutex); + + return r; +} + +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_userq *args = data; + int r; + + switch (args->in.op) { + case AMDGPU_USERQ_OP_CREATE: + if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK | + AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE)) + return -EINVAL; + r = amdgpu_userq_create(filp, args); + if (r) + drm_file_err(filp, "Failed to create usermode queue\n"); + break; + + case AMDGPU_USERQ_OP_FREE: + if (args->in.ip_type || + args->in.doorbell_handle || + args->in.doorbell_offset || + args->in.flags || + args->in.queue_va || + args->in.queue_size || + args->in.rptr_va || + args->in.wptr_va || + args->in.wptr_va || + args->in.mqd || + args->in.mqd_size) + return -EINVAL; + r = amdgpu_userq_destroy(filp, args->in.queue_id); + if (r) + drm_file_err(filp, "Failed to destroy usermode queue\n"); + break; + + default: + drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op); + return -EINVAL; + } + + return r; +} + +static int +amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0, r; + + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + r = amdgpu_userq_map_helper(uq_mgr, queue); + if (r) + ret = r; + } + + if (ret) + drm_file_err(uq_mgr->file, "Failed to map all the queues\n"); + return ret; +} + +static int +amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + DRM_ERROR("Fail to validate\n"); + + return ret; +} + +static int +amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va *bo_va; + struct ww_acquire_ctx *ticket; + struct drm_exec exec; + struct amdgpu_bo *bo; + struct dma_resv *resv; + bool clear, unlock; + int ret = 0; + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) { + drm_file_err(uq_mgr->file, "Failed to lock PD\n"); + goto unlock_all; + } + + /* Lock the done list */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) { + bo = bo_va->base.bo; + if (!bo) + continue; + + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_all; + } + } + + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + + /* Per VM BOs never need to bo cleared in the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) + goto unlock_all; + spin_lock(&vm->status_lock); + } + + ticket = &exec.ticket; + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, + base.vm_status); + resv = bo_va->base.bo->tbo.base.resv; + spin_unlock(&vm->status_lock); + + bo = bo_va->base.bo; + ret = amdgpu_userq_validate_vm_bo(NULL, bo); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to validate BO\n"); + goto unlock_all; + } + + /* Try to reserve the BO to avoid clearing its ptes */ + if (!adev->debug_vm && dma_resv_trylock(resv)) { + clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; + /* Somebody else is using the BO right now */ + } else { + clear = true; + unlock = false; + } + + ret = amdgpu_vm_bo_update(adev, bo_va, clear); + + if (unlock) + dma_resv_unlock(resv); + if (ret) + goto unlock_all; + + spin_lock(&vm->status_lock); + } + spin_unlock(&vm->status_lock); + + ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); + if (ret) + drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n"); + +unlock_all: + drm_exec_fini(&exec); + return ret; +} + +static void amdgpu_userq_restore_worker(struct work_struct *work) +{ + struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + int ret; + + flush_work(&fpriv->evf_mgr.suspend_work.work); + + mutex_lock(&uq_mgr->userq_mutex); + + ret = amdgpu_userq_validate_bos(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n"); + goto unlock; + } + + ret = amdgpu_userq_restore_all(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to restore all queues\n"); + goto unlock; + } + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); +} + +static int +amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0, r; + + /* Try to unmap all the queues in this process ctx */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + r = amdgpu_userq_unmap_helper(uq_mgr, queue); + if (r) + ret = r; + } + + if (ret) + drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n"); + return ret; +} + +static int +amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id, ret; + + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + struct dma_fence *f = queue->last_fence; + + if (!f || dma_fence_is_signaled(f)) + continue; + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) { + drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", + f->context, f->seqno); + return -ETIMEDOUT; + } + } + + return 0; +} + +void +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence) +{ + int ret; + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; + + /* Wait for any pending userqueue fence work to finish */ + ret = amdgpu_userq_wait_for_signal(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n"); + return; + } + + ret = amdgpu_userq_evict_all(uq_mgr); + if (ret) { + drm_file_err(uq_mgr->file, "Failed to evict userqueue\n"); + return; + } + + /* Signal current eviction fence */ + amdgpu_eviction_fence_signal(evf_mgr, ev_fence); + + if (evf_mgr->fd_closing) { + cancel_delayed_work_sync(&uq_mgr->resume_work); + return; + } + + /* Schedule a resume work */ + schedule_delayed_work(&uq_mgr->resume_work, 0); +} + +int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, + struct amdgpu_device *adev) +{ + mutex_init(&userq_mgr->userq_mutex); + idr_init_base(&userq_mgr->userq_idr, 1); + userq_mgr->adev = adev; + userq_mgr->file = file_priv; + + mutex_lock(&adev->userq_mutex); + list_add(&userq_mgr->list, &adev->userq_mgr_list); + mutex_unlock(&adev->userq_mutex); + + INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); + return 0; +} + +void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) +{ + struct amdgpu_device *adev = userq_mgr->adev; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + uint32_t queue_id; + + cancel_delayed_work_sync(&userq_mgr->resume_work); + + mutex_lock(&adev->userq_mutex); + mutex_lock(&userq_mgr->userq_mutex); + idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) { + amdgpu_userq_wait_for_last_fence(userq_mgr, queue); + amdgpu_userq_unmap_helper(userq_mgr, queue); + amdgpu_userq_cleanup(userq_mgr, queue, queue_id); + } + + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + if (uqm == userq_mgr) { + list_del(&uqm->list); + break; + } + } + idr_destroy(&userq_mgr->userq_idr); + mutex_unlock(&userq_mgr->userq_mutex); + mutex_unlock(&adev->userq_mutex); + mutex_destroy(&userq_mgr->userq_mutex); +} + +int amdgpu_userq_suspend(struct amdgpu_device *adev) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + if (!ip_mask) + return 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + cancel_delayed_work_sync(&uqm->resume_work); + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + r = amdgpu_userq_unmap_helper(uqm, queue); + if (r) + ret = r; + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_resume(struct amdgpu_device *adev) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + if (!ip_mask) + return 0; + + mutex_lock(&adev->userq_mutex); + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + r = amdgpu_userq_map_helper(uqm, queue); + if (r) + ret = r; + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + /* only need to stop gfx/compute */ + if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) + return 0; + + mutex_lock(&adev->userq_mutex); + if (adev->userq_halt_for_enforce_isolation) + dev_warn(adev->dev, "userq scheduling already stopped!\n"); + adev->userq_halt_for_enforce_isolation = true; + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + cancel_delayed_work_sync(&uqm->resume_work); + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && + (queue->xcp_id == idx)) { + r = amdgpu_userq_unmap_helper(uqm, queue); + if (r) + ret = r; + } + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} + +int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx) +{ + u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + int queue_id; + int ret = 0, r; + + /* only need to stop gfx/compute */ + if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) + return 0; + + mutex_lock(&adev->userq_mutex); + if (!adev->userq_halt_for_enforce_isolation) + dev_warn(adev->dev, "userq scheduling already started!\n"); + adev->userq_halt_for_enforce_isolation = false; + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + mutex_lock(&uqm->userq_mutex); + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (((queue->queue_type == AMDGPU_HW_IP_GFX) || + (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && + (queue->xcp_id == idx)) { + r = amdgpu_userq_map_helper(uqm, queue); + if (r) + ret = r; + } + } + mutex_unlock(&uqm->userq_mutex); + } + mutex_unlock(&adev->userq_mutex); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h new file mode 100644 index 000000000000..ec040c2fd6c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_USERQ_H_ +#define AMDGPU_USERQ_H_ +#include "amdgpu_eviction_fence.h" + +#define AMDGPU_MAX_USERQ_COUNT 512 + +#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base) +#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr) +#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name) + +enum amdgpu_userq_state { + AMDGPU_USERQ_STATE_UNMAPPED = 0, + AMDGPU_USERQ_STATE_MAPPED, + AMDGPU_USERQ_STATE_PREEMPTED, + AMDGPU_USERQ_STATE_HUNG, +}; + +struct amdgpu_mqd_prop; + +struct amdgpu_userq_obj { + void *cpu_ptr; + uint64_t gpu_addr; + struct amdgpu_bo *obj; +}; + +struct amdgpu_usermode_queue { + int queue_type; + enum amdgpu_userq_state state; + uint64_t doorbell_handle; + uint64_t doorbell_index; + uint64_t flags; + struct amdgpu_mqd_prop *userq_prop; + struct amdgpu_userq_mgr *userq_mgr; + struct amdgpu_vm *vm; + struct amdgpu_userq_obj mqd; + struct amdgpu_userq_obj db_obj; + struct amdgpu_userq_obj fw_obj; + struct amdgpu_userq_obj wptr_obj; + struct xarray fence_drv_xa; + struct amdgpu_userq_fence_driver *fence_drv; + struct dma_fence *last_fence; + u32 xcp_id; + int priority; +}; + +struct amdgpu_userq_funcs { + int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args, + struct amdgpu_usermode_queue *queue); + void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *uq); + int (*unmap)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*map)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); +}; + +/* Usermode queues for gfx */ +struct amdgpu_userq_mgr { + struct idr userq_idr; + struct mutex userq_mutex; + struct amdgpu_device *adev; + struct delayed_work resume_work; + struct list_head list; + struct drm_file *file; +}; + +struct amdgpu_db_info { + uint64_t doorbell_handle; + uint32_t queue_type; + uint32_t doorbell_offset; + struct amdgpu_userq_obj *db_obj; +}; + +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, + struct amdgpu_device *adev); + +void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); + +int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj, + int size); + +void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_userq_obj *userq_obj); + +void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence); + +int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr); + +void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr); + +uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_db_info *db_info, + struct drm_file *filp); + +u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev); + +int amdgpu_userq_suspend(struct amdgpu_device *adev); +int amdgpu_userq_resume(struct amdgpu_device *adev); + +int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx); +int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, + u32 idx); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c new file mode 100644 index 000000000000..fc4d0d42e223 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -0,0 +1,968 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include + +#include +#include + +#include "amdgpu.h" +#include "amdgpu_userq_fence.h" + +static const struct dma_fence_ops amdgpu_userq_fence_ops; +static struct kmem_cache *amdgpu_userq_fence_slab; + +int amdgpu_userq_fence_slab_init(void) +{ + amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", + sizeof(struct amdgpu_userq_fence), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!amdgpu_userq_fence_slab) + return -ENOMEM; + + return 0; +} + +void amdgpu_userq_fence_slab_fini(void) +{ + rcu_barrier(); + kmem_cache_destroy(amdgpu_userq_fence_slab); +} + +static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) +{ + if (!f || f->ops != &amdgpu_userq_fence_ops) + return NULL; + + return container_of(f, struct amdgpu_userq_fence, base); +} + +static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) +{ + return le64_to_cpu(*fence_drv->cpu_addr); +} + +int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *userq) +{ + struct amdgpu_userq_fence_driver *fence_drv; + unsigned long flags; + int r; + + fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL); + if (!fence_drv) + return -ENOMEM; + + /* Acquire seq64 memory */ + r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr, + &fence_drv->cpu_addr); + if (r) + goto free_fence_drv; + + memset(fence_drv->cpu_addr, 0, sizeof(u64)); + + kref_init(&fence_drv->refcount); + INIT_LIST_HEAD(&fence_drv->fences); + spin_lock_init(&fence_drv->fence_list_lock); + + fence_drv->adev = adev; + fence_drv->context = dma_fence_context_alloc(1); + get_task_comm(fence_drv->timeline_name, current); + + xa_lock_irqsave(&adev->userq_xa, flags); + r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index, + fence_drv, GFP_KERNEL)); + xa_unlock_irqrestore(&adev->userq_xa, flags); + if (r) + goto free_seq64; + + userq->fence_drv = fence_drv; + + return 0; + +free_seq64: + amdgpu_seq64_free(adev, fence_drv->va); +free_fence_drv: + kfree(fence_drv); + + return r; +} + +static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa) +{ + struct amdgpu_userq_fence_driver *fence_drv; + unsigned long index; + + if (xa_empty(xa)) + return; + + xa_lock(xa); + xa_for_each(xa, index, fence_drv) { + __xa_erase(xa, index); + amdgpu_userq_fence_driver_put(fence_drv); + } + + xa_unlock(xa); +} + +void +amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) +{ + amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); + xa_destroy(&userq->fence_drv_xa); + /* Drop the fence_drv reference held by user queue */ + amdgpu_userq_fence_driver_put(userq->fence_drv); +} + +void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) +{ + struct amdgpu_userq_fence *userq_fence, *tmp; + struct dma_fence *fence; + u64 rptr; + int i; + + if (!fence_drv) + return; + + rptr = amdgpu_userq_fence_read(fence_drv); + + spin_lock(&fence_drv->fence_list_lock); + list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { + fence = &userq_fence->base; + + if (rptr < fence->seqno) + break; + + dma_fence_signal(fence); + + for (i = 0; i < userq_fence->fence_drv_array_count; i++) + amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]); + + list_del(&userq_fence->link); + dma_fence_put(fence); + } + spin_unlock(&fence_drv->fence_list_lock); +} + +void amdgpu_userq_fence_driver_destroy(struct kref *ref) +{ + struct amdgpu_userq_fence_driver *fence_drv = container_of(ref, + struct amdgpu_userq_fence_driver, + refcount); + struct amdgpu_userq_fence_driver *xa_fence_drv; + struct amdgpu_device *adev = fence_drv->adev; + struct amdgpu_userq_fence *fence, *tmp; + struct xarray *xa = &adev->userq_xa; + unsigned long index, flags; + struct dma_fence *f; + + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); + list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) { + f = &fence->base; + + if (!dma_fence_is_signaled(f)) { + dma_fence_set_error(f, -ECANCELED); + dma_fence_signal(f); + } + + list_del(&fence->link); + dma_fence_put(f); + } + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + + xa_lock_irqsave(xa, flags); + xa_for_each(xa, index, xa_fence_drv) + if (xa_fence_drv == fence_drv) + __xa_erase(xa, index); + xa_unlock_irqrestore(xa, flags); + + /* Free seq64 memory */ + amdgpu_seq64_free(adev, fence_drv->va); + kfree(fence_drv); +} + +void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv) +{ + kref_get(&fence_drv->refcount); +} + +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) +{ + kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); +} + +static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +{ + *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + return *userq_fence ? 0 : -ENOMEM; +} + +static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *userq_fence, + u64 seq, struct dma_fence **f) +{ + struct amdgpu_userq_fence_driver *fence_drv; + struct dma_fence *fence; + unsigned long flags; + + fence_drv = userq->fence_drv; + if (!fence_drv) + return -EINVAL; + + spin_lock_init(&userq_fence->lock); + INIT_LIST_HEAD(&userq_fence->link); + fence = &userq_fence->base; + userq_fence->fence_drv = fence_drv; + + dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + fence_drv->context, seq); + + amdgpu_userq_fence_driver_get(fence_drv); + dma_fence_get(fence); + + if (!xa_empty(&userq->fence_drv_xa)) { + struct amdgpu_userq_fence_driver *stored_fence_drv; + unsigned long index, count = 0; + int i = 0; + + xa_lock(&userq->fence_drv_xa); + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) + count++; + + userq_fence->fence_drv_array = + kvmalloc_array(count, + sizeof(struct amdgpu_userq_fence_driver *), + GFP_ATOMIC); + + if (userq_fence->fence_drv_array) { + xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { + userq_fence->fence_drv_array[i] = stored_fence_drv; + __xa_erase(&userq->fence_drv_xa, index); + i++; + } + } + + userq_fence->fence_drv_array_count = i; + xa_unlock(&userq->fence_drv_xa); + } else { + userq_fence->fence_drv_array = NULL; + userq_fence->fence_drv_array_count = 0; + } + + /* Check if hardware has already processed the job */ + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); + if (!dma_fence_is_signaled_locked(fence)) + list_add_tail(&userq_fence->link, &fence_drv->fences); + else + dma_fence_put(fence); + + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + + *f = fence; + + return 0; +} + +static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_userq_fence"; +} + +static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + + return fence->fence_drv->timeline_name; +} + +static bool amdgpu_userq_fence_signaled(struct dma_fence *f) +{ + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; + u64 rptr, wptr; + + rptr = amdgpu_userq_fence_read(fence_drv); + wptr = fence->base.seqno; + + if (rptr >= wptr) + return true; + + return false; +} + +static void amdgpu_userq_fence_free(struct rcu_head *rcu) +{ + struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu); + struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence); + struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv; + + /* Release the fence driver reference */ + amdgpu_userq_fence_driver_put(fence_drv); + + kvfree(userq_fence->fence_drv_array); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); +} + +static void amdgpu_userq_fence_release(struct dma_fence *f) +{ + call_rcu(&f->rcu, amdgpu_userq_fence_free); +} + +static const struct dma_fence_ops amdgpu_userq_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_userq_fence_get_driver_name, + .get_timeline_name = amdgpu_userq_fence_get_timeline_name, + .signaled = amdgpu_userq_fence_signaled, + .release = amdgpu_userq_fence_release, +}; + +/** + * amdgpu_userq_fence_read_wptr - Read the userq wptr value + * + * @queue: user mode queue structure pointer + * @wptr: write pointer value + * + * Read the wptr value from userq's MQD. The userq signal IOCTL + * creates a dma_fence for the shared buffers that expects the + * RPTR value written to seq64 memory >= WPTR. + * + * Returns wptr value on success, error on failure. + */ +static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue, + u64 *wptr) +{ + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *bo; + u64 addr, *ptr; + int r; + + r = amdgpu_bo_reserve(queue->vm->root.bo, false); + if (r) + return r; + + addr = queue->userq_prop->wptr_gpu_addr; + addr &= AMDGPU_GMC_HOLE_MASK; + + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); + if (!mapping) { + amdgpu_bo_unreserve(queue->vm->root.bo); + DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); + return -EINVAL; + } + + bo = amdgpu_bo_ref(mapping->bo_va->base.bo); + amdgpu_bo_unreserve(queue->vm->root.bo); + r = amdgpu_bo_reserve(bo, true); + if (r) { + DRM_ERROR("Failed to reserve userqueue wptr bo"); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&ptr); + if (r) { + DRM_ERROR("Failed mapping the userqueue wptr bo"); + goto map_error; + } + + *wptr = le64_to_cpu(*ptr); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + return 0; + +map_error: + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + return r; +} + +static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) +{ + dma_fence_put(fence); +} + +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_amdgpu_userq_signal *args = data; + struct drm_gem_object **gobj_write = NULL; + struct drm_gem_object **gobj_read = NULL; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *userq_fence; + struct drm_syncobj **syncobj = NULL; + u32 *bo_handles_write, num_write_bo_handles; + u32 *syncobj_handles, num_syncobj_handles; + u32 *bo_handles_read, num_read_bo_handles; + int r, i, entry, rentry, wentry; + struct dma_fence *fence; + struct drm_exec exec; + u64 wptr; + + num_syncobj_handles = args->num_syncobj_handles; + syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), + sizeof(u32) * num_syncobj_handles); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + /* Array of pointers to the looked up syncobjs */ + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); + if (!syncobj) { + r = -ENOMEM; + goto free_syncobj_handles; + } + + for (entry = 0; entry < num_syncobj_handles; entry++) { + syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]); + if (!syncobj[entry]) { + r = -ENOENT; + goto free_syncobj; + } + } + + num_read_bo_handles = args->num_bo_read_handles; + bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles), + sizeof(u32) * num_read_bo_handles); + if (IS_ERR(bo_handles_read)) { + r = PTR_ERR(bo_handles_read); + goto free_syncobj; + } + + /* Array of pointers to the GEM read objects */ + gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); + if (!gobj_read) { + r = -ENOMEM; + goto free_bo_handles_read; + } + + for (rentry = 0; rentry < num_read_bo_handles; rentry++) { + gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); + if (!gobj_read[rentry]) { + r = -ENOENT; + goto put_gobj_read; + } + } + + num_write_bo_handles = args->num_bo_write_handles; + bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles), + sizeof(u32) * num_write_bo_handles); + if (IS_ERR(bo_handles_write)) { + r = PTR_ERR(bo_handles_write); + goto put_gobj_read; + } + + /* Array of pointers to the GEM write objects */ + gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); + if (!gobj_write) { + r = -ENOMEM; + goto free_bo_handles_write; + } + + for (wentry = 0; wentry < num_write_bo_handles; wentry++) { + gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); + if (!gobj_write[wentry]) { + r = -ENOENT; + goto put_gobj_write; + } + } + + /* Retrieve the user queue */ + queue = idr_find(&userq_mgr->userq_idr, args->queue_id); + if (!queue) { + r = -ENOENT; + goto put_gobj_write; + } + + r = amdgpu_userq_fence_read_wptr(queue, &wptr); + if (r) + goto put_gobj_write; + + r = amdgpu_userq_fence_alloc(&userq_fence); + if (r) + goto put_gobj_write; + + /* We are here means UQ is active, make sure the eviction fence is valid */ + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + + /* Create a new fence */ + r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + goto put_gobj_write; + } + + dma_fence_put(queue->last_fence); + queue->last_fence = dma_fence_get(fence); + mutex_unlock(&userq_mgr->userq_mutex); + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, + (num_read_bo_handles + num_write_bo_handles)); + + /* Lock all BOs with retry handling */ + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + amdgpu_userq_fence_cleanup(fence); + goto exec_fini; + } + + r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + amdgpu_userq_fence_cleanup(fence); + goto exec_fini; + } + } + + for (i = 0; i < num_read_bo_handles; i++) { + if (!gobj_read || !gobj_read[i]->resv) + continue; + + dma_resv_add_fence(gobj_read[i]->resv, fence, + DMA_RESV_USAGE_READ); + } + + for (i = 0; i < num_write_bo_handles; i++) { + if (!gobj_write || !gobj_write[i]->resv) + continue; + + dma_resv_add_fence(gobj_write[i]->resv, fence, + DMA_RESV_USAGE_WRITE); + } + + /* Add the created fence to syncobj/BO's */ + for (i = 0; i < num_syncobj_handles; i++) + drm_syncobj_replace_fence(syncobj[i], fence); + + /* drop the reference acquired in fence creation function */ + dma_fence_put(fence); + +exec_fini: + drm_exec_fini(&exec); +put_gobj_write: + while (wentry-- > 0) + drm_gem_object_put(gobj_write[wentry]); + kfree(gobj_write); +free_bo_handles_write: + kfree(bo_handles_write); +put_gobj_read: + while (rentry-- > 0) + drm_gem_object_put(gobj_read[rentry]); + kfree(gobj_read); +free_bo_handles_read: + kfree(bo_handles_read); +free_syncobj: + while (entry-- > 0) + if (syncobj[entry]) + drm_syncobj_put(syncobj[entry]); + kfree(syncobj); +free_syncobj_handles: + kfree(syncobj_handles); + + return r; +} + +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write; + u32 num_syncobj, num_read_bo_handles, num_write_bo_handles; + struct drm_amdgpu_userq_fence_info *fence_info = NULL; + struct drm_amdgpu_userq_wait *wait_info = data; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct amdgpu_usermode_queue *waitq; + struct drm_gem_object **gobj_write; + struct drm_gem_object **gobj_read; + struct dma_fence **fences = NULL; + u16 num_points, num_fences = 0; + int r, i, rentry, wentry, cnt; + struct drm_exec exec; + + num_read_bo_handles = wait_info->num_bo_read_handles; + bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), + sizeof(u32) * num_read_bo_handles); + if (IS_ERR(bo_handles_read)) + return PTR_ERR(bo_handles_read); + + num_write_bo_handles = wait_info->num_bo_write_handles; + bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles), + sizeof(u32) * num_write_bo_handles); + if (IS_ERR(bo_handles_write)) { + r = PTR_ERR(bo_handles_write); + goto free_bo_handles_read; + } + + num_syncobj = wait_info->num_syncobj_handles; + syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles), + sizeof(u32) * num_syncobj); + if (IS_ERR(syncobj_handles)) { + r = PTR_ERR(syncobj_handles); + goto free_bo_handles_write; + } + + num_points = wait_info->num_syncobj_timeline_handles; + timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles), + sizeof(u32) * num_points); + if (IS_ERR(timeline_handles)) { + r = PTR_ERR(timeline_handles); + goto free_syncobj_handles; + } + + timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points), + sizeof(u32) * num_points); + if (IS_ERR(timeline_points)) { + r = PTR_ERR(timeline_points); + goto free_timeline_handles; + } + + gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL); + if (!gobj_read) { + r = -ENOMEM; + goto free_timeline_points; + } + + for (rentry = 0; rentry < num_read_bo_handles; rentry++) { + gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]); + if (!gobj_read[rentry]) { + r = -ENOENT; + goto put_gobj_read; + } + } + + gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL); + if (!gobj_write) { + r = -ENOMEM; + goto put_gobj_read; + } + + for (wentry = 0; wentry < num_write_bo_handles; wentry++) { + gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]); + if (!gobj_write[wentry]) { + r = -ENOENT; + goto put_gobj_write; + } + } + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, + (num_read_bo_handles + num_write_bo_handles)); + + /* Lock all BOs with retry handling */ + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + drm_exec_fini(&exec); + goto put_gobj_write; + } + + r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + drm_exec_retry_on_contention(&exec); + if (r) { + drm_exec_fini(&exec); + goto put_gobj_write; + } + } + + if (!wait_info->num_fences) { + if (num_points) { + struct dma_fence_unwrap iter; + struct dma_fence *fence; + struct dma_fence *f; + + for (i = 0; i < num_points; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto exec_fini; + + dma_fence_unwrap_for_each(f, &iter, fence) + num_fences++; + + dma_fence_put(fence); + } + } + + /* Count syncobj's fence */ + for (i = 0; i < num_syncobj; i++) { + struct dma_fence *fence; + + r = drm_syncobj_find_fence(filp, syncobj_handles[i], + 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto exec_fini; + + num_fences++; + dma_fence_put(fence); + } + + /* Count GEM objects fence */ + for (i = 0; i < num_read_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) + num_fences++; + } + + for (i = 0; i < num_write_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) + num_fences++; + } + + /* + * Passing num_fences = 0 means that userspace doesn't want to + * retrieve userq_fence_info. If num_fences = 0 we skip filling + * userq_fence_info and return the actual number of fences on + * args->num_fences. + */ + wait_info->num_fences = num_fences; + } else { + /* Array of fence info */ + fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL); + if (!fence_info) { + r = -ENOMEM; + goto exec_fini; + } + + /* Array of fences */ + fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL); + if (!fences) { + r = -ENOMEM; + goto free_fence_info; + } + + /* Retrieve GEM read objects fence */ + for (i = 0; i < num_read_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, + DMA_RESV_USAGE_READ, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + dma_fence_get(fence); + } + } + + /* Retrieve GEM write objects fence */ + for (i = 0; i < num_write_bo_handles; i++) { + struct dma_resv_iter resv_cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, + DMA_RESV_USAGE_WRITE, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + dma_fence_get(fence); + } + } + + if (num_points) { + struct dma_fence_unwrap iter; + struct dma_fence *fence; + struct dma_fence *f; + + for (i = 0; i < num_points; i++) { + r = drm_syncobj_find_fence(filp, timeline_handles[i], + timeline_points[i], + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto free_fences; + + dma_fence_unwrap_for_each(f, &iter, fence) { + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + dma_fence_get(f); + fences[num_fences++] = f; + } + + dma_fence_put(fence); + } + } + + /* Retrieve syncobj's fence */ + for (i = 0; i < num_syncobj; i++) { + struct dma_fence *fence; + + r = drm_syncobj_find_fence(filp, syncobj_handles[i], + 0, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, + &fence); + if (r) + goto free_fences; + + if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) { + r = -EINVAL; + goto free_fences; + } + + fences[num_fences++] = fence; + } + + /* + * Keep only the latest fences to reduce the number of values + * given back to userspace. + */ + num_fences = dma_fence_dedup_array(fences, num_fences); + + waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id); + if (!waitq) { + r = -EINVAL; + goto free_fences; + } + + for (i = 0, cnt = 0; i < num_fences; i++) { + struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence *userq_fence; + u32 index; + + userq_fence = to_amdgpu_userq_fence(fences[i]); + if (!userq_fence) { + /* + * Just waiting on other driver fences should + * be good for now + */ + r = dma_fence_wait(fences[i], true); + if (r) { + dma_fence_put(fences[i]); + goto free_fences; + } + + dma_fence_put(fences[i]); + continue; + } + + fence_drv = userq_fence->fence_drv; + /* + * We need to make sure the user queue release their reference + * to the fence drivers at some point before queue destruction. + * Otherwise, we would gather those references until we don't + * have any more space left and crash. + */ + r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, + xa_limit_32b, GFP_KERNEL); + if (r) + goto free_fences; + + amdgpu_userq_fence_driver_get(fence_drv); + + /* Store drm syncobj's gpu va address and value */ + fence_info[cnt].va = fence_drv->va; + fence_info[cnt].value = fences[i]->seqno; + + dma_fence_put(fences[i]); + /* Increment the actual userq fence count */ + cnt++; + } + + wait_info->num_fences = cnt; + /* Copy userq fence info to user space */ + if (copy_to_user(u64_to_user_ptr(wait_info->out_fences), + fence_info, wait_info->num_fences * sizeof(*fence_info))) { + r = -EFAULT; + goto free_fences; + } + + kfree(fences); + kfree(fence_info); + } + + drm_exec_fini(&exec); + for (i = 0; i < num_read_bo_handles; i++) + drm_gem_object_put(gobj_read[i]); + kfree(gobj_read); + + for (i = 0; i < num_write_bo_handles; i++) + drm_gem_object_put(gobj_write[i]); + kfree(gobj_write); + + kfree(timeline_points); + kfree(timeline_handles); + kfree(syncobj_handles); + kfree(bo_handles_write); + kfree(bo_handles_read); + + return 0; + +free_fences: + while (num_fences-- > 0) + dma_fence_put(fences[num_fences]); + kfree(fences); +free_fence_info: + kfree(fence_info); +exec_fini: + drm_exec_fini(&exec); +put_gobj_write: + while (wentry-- > 0) + drm_gem_object_put(gobj_write[wentry]); + kfree(gobj_write); +put_gobj_read: + while (rentry-- > 0) + drm_gem_object_put(gobj_read[rentry]); + kfree(gobj_read); +free_timeline_points: + kfree(timeline_points); +free_timeline_handles: + kfree(timeline_handles); +free_syncobj_handles: + kfree(syncobj_handles); +free_bo_handles_write: + kfree(bo_handles_write); +free_bo_handles_read: + kfree(bo_handles_read); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h new file mode 100644 index 000000000000..97a125ab8a78 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_USERQ_FENCE_H__ +#define __AMDGPU_USERQ_FENCE_H__ + +#include + +#include "amdgpu_userq.h" + +struct amdgpu_userq_fence { + struct dma_fence base; + /* + * This lock is necessary to synchronize the + * userqueue dma fence operations. + */ + spinlock_t lock; + struct list_head link; + unsigned long fence_drv_array_count; + struct amdgpu_userq_fence_driver *fence_drv; + struct amdgpu_userq_fence_driver **fence_drv_array; +}; + +struct amdgpu_userq_fence_driver { + struct kref refcount; + u64 va; + u64 gpu_addr; + u64 *cpu_addr; + u64 context; + /* + * This lock is necesaary to synchronize the access + * to the fences list by the fence driver. + */ + spinlock_t fence_list_lock; + struct list_head fences; + struct amdgpu_device *adev; + char timeline_name[TASK_COMM_LEN]; +}; + +int amdgpu_userq_fence_slab_init(void); +void amdgpu_userq_fence_slab_fini(void); + +void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); +int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *userq); +void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); +void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_destroy(struct kref *ref); +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 1991dd3d1056..c8885c3d54b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -353,9 +353,9 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); - /* err_event_athub will corrupt VCPU buffer, so we need to + /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ - if (in_ras_intr) + if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset) return 0; return amdgpu_vcn_save_vcpu_bo_inst(adev, i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0bb8cbe0dcc0..13f0cdeb59c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1323,6 +1323,9 @@ static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo { struct amdgpu_virt *virt = &adev->virt; + if (!virt->ops || !virt->ops->req_ras_err_count) + return -EOPNOTSUPP; + /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host * will ignore incoming guest messages. Ratelimit the guest messages to * prevent guest self DOS. @@ -1378,14 +1381,16 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev, used_size = host_telemetry->header.used_size; if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) - return 0; + return -EINVAL; cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL); if (!cper_dump) return -ENOMEM; - if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) + if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) { + ret = -EINVAL; goto out; + } *more = cper_dump->more; @@ -1425,7 +1430,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev) int ret = 0; uint32_t more = 0; - if (!amdgpu_sriov_ras_cper_en(adev)) + if (!virt->ops || !virt->ops->req_ras_cper_dump) return -EOPNOTSUPP; do { @@ -1434,7 +1439,7 @@ static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev) adev, virt->fw_reserve.ras_telemetry, &more); else ret = 0; - } while (more); + } while (more && !ret); return ret; } @@ -1444,6 +1449,9 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update) struct amdgpu_virt *virt = &adev->virt; int ret = 0; + if (!amdgpu_sriov_ras_cper_en(adev)) + return -EOPNOTSUPP; + if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) && down_read_trylock(&adev->reset_domain->sem)) { mutex_lock(&virt->ras.ras_telemetry_mutex); @@ -1480,3 +1488,16 @@ bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, return true; } + +/* + * amdgpu_virt_request_bad_pages() - request bad pages + * @adev: amdgpu device. + * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region + */ +void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (virt->ops && virt->ops->req_bad_pages) + virt->ops->req_bad_pages(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index df03dba67ab8..577c6194db78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -97,6 +97,7 @@ struct amdgpu_virt_ops { bool (*rcvd_ras_intr)(struct amdgpu_device *adev); int (*req_ras_err_count)(struct amdgpu_device *adev); int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); + int (*req_bad_pages)(struct amdgpu_device *adev); }; /* @@ -146,11 +147,13 @@ enum AMDGIM_FEATURE_FLAG { enum AMDGIM_REG_ACCESS_FLAG { /* Use PSP to program IH_RB_CNTL */ - AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0), + AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0), /* Use RLC to program MMHUB regs */ - AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), + AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), /* Use RLC to program GC regs */ - AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), + AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), + /* Use PSP to program L1_TLB_CNTL*/ + AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3), }; struct amdgim_pf2vf_info_v1 { @@ -260,7 +263,10 @@ struct amdgpu_virt { uint32_t reg_val_offs; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; + struct work_struct flr_work; + struct work_struct bad_pages_work; + struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; @@ -330,6 +336,10 @@ struct amdgpu_video_codec_info; (amdgpu_sriov_vf((adev)) && \ ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN))) +#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \ +(amdgpu_sriov_vf((adev)) && \ + ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN))) + #define amdgpu_sriov_rlcg_error_report_enabled(adev) \ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) @@ -423,4 +433,5 @@ int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update) int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, enum amdgpu_ras_block block); +void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ce52b4d75e94..3911c78f8282 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -787,7 +787,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - cleaner_shader_needed = adev->gfx.enable_cleaner_shader && + cleaner_shader_needed = job->run_cleaner_shader && + adev->gfx.enable_cleaner_shader && ring->funcs->emit_cleaner_shader && job->base.s_fence && &job->base.s_fence->scheduled == isolation->spearhead; @@ -817,7 +818,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + if (ring->funcs->emit_gds_switch && gds_switch_needed) { amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 23b6f7a4aa4a..b03c3895897b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -709,10 +709,10 @@ void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev) struct amdgpu_xcp_cfg *xcp_cfg; int i; - if (!adev->xcp_mgr) + if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg) return; - xcp_cfg = adev->xcp_mgr->xcp_cfg; + xcp_cfg = adev->xcp_mgr->xcp_cfg; for (i = 0; i < xcp_cfg->num_res; i++) { xcp_res = &xcp_cfg->xcp_res[i]; kobject_put(&xcp_res->kobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 477424472bbe..f51ef4cf16e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -296,15 +296,27 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = { static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num) { - const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070; - const int xgmi_inst = 2; - u32 link_inst; + const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 }; + const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070, + 0x11b00070 }; + u32 i, n; u64 addr; - link_inst = global_link_num % xgmi_inst; + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1); + addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n]; + break; + case IP_VERSION(6, 4, 1): + n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1); + addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n]; + break; + default: + return U32_MAX; + } - addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) + - adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst); + i = global_link_num / n; + addr += adev->asic_funcs->encode_ext_smn_addressing(i); return RREG32_PCIE_EXT(addr); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index d6ac2652f0ac..92ca13097aaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -109,10 +109,11 @@ union amd_sriov_msg_feature_flags { union amd_sriov_reg_access_flags { struct { - uint32_t vf_reg_access_ih : 1; - uint32_t vf_reg_access_mmhub : 1; - uint32_t vf_reg_access_gc : 1; - uint32_t reserved : 29; + uint32_t vf_reg_access_ih : 1; + uint32_t vf_reg_access_mmhub : 1; + uint32_t vf_reg_access_gc : 1; + uint32_t vf_reg_access_l1_tlb_cntl : 1; + uint32_t reserved : 28; } flags; uint32_t all; }; @@ -330,6 +331,7 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_RAS_POISON = 202, MB_REQ_RAS_ERROR_COUNT = 203, MB_REQ_RAS_CPER_DUMP = 204, + MB_REQ_RAS_BAD_PAGES = 205, }; /* mailbox message send from host to guest */ @@ -347,6 +349,9 @@ enum amd_sriov_mailbox_response_message { MB_RES_MSG_GPU_RMA = 10, MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, MB_REQ_RAS_CPER_DUMP_READY = 14, + MB_RES_MSG_RAS_BAD_PAGES_READY = 15, + MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16, + MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17, MB_RES_MSG_TEXT_MESSAGE = 255 }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index ae071985f26e..1c083304ae77 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -448,6 +448,49 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x return 0; } +static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr, + int px_mode, int *num_xcp, + uint16_t *nps_modes) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + + if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode))) + return -EINVAL; + + switch (px_mode) { + case AMDGPU_SPX_PARTITION_MODE: + *num_xcp = 1; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); + break; + case AMDGPU_DPX_PARTITION_MODE: + *num_xcp = 2; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS2_PARTITION_MODE); + break; + case AMDGPU_TPX_PARTITION_MODE: + *num_xcp = 3; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + case AMDGPU_QPX_PARTITION_MODE: + *num_xcp = 4; + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + case AMDGPU_CPX_PARTITION_MODE: + *num_xcp = NUM_XCC(adev->gfx.xcc_mask); + *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + if (amdgpu_sriov_vf(adev)) + *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE); + break; + default: + return -EINVAL; + } + + return 0; +} + static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, int mode, struct amdgpu_xcp_cfg *xcp_cfg) @@ -455,7 +498,7 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, struct amdgpu_device *adev = xcp_mgr->adev; int max_res[AMDGPU_XCP_RES_MAX] = {}; bool res_lt_xcp; - int num_xcp, i; + int num_xcp, i, r; u16 nps_modes; if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) @@ -466,34 +509,9 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst; max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst; - switch (mode) { - case AMDGPU_SPX_PARTITION_MODE: - num_xcp = 1; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); - break; - case AMDGPU_DPX_PARTITION_MODE: - num_xcp = 2; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS2_PARTITION_MODE); - break; - case AMDGPU_TPX_PARTITION_MODE: - num_xcp = 3; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS4_PARTITION_MODE); - break; - case AMDGPU_QPX_PARTITION_MODE: - num_xcp = 4; - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS4_PARTITION_MODE); - break; - case AMDGPU_CPX_PARTITION_MODE: - num_xcp = NUM_XCC(adev->gfx.xcc_mask); - nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS4_PARTITION_MODE); - break; - default: - return -EINVAL; - } + r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, &nps_modes); + if (r) + return r; xcp_cfg->compatible_nps_modes = (adev->gmc.supported_nps_modes & nps_modes); @@ -543,30 +561,31 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, enum amdgpu_gfx_partition mode) { struct amdgpu_device *adev = xcp_mgr->adev; - int num_xcc, num_xccs_per_xcp; + int num_xcc, num_xccs_per_xcp, r; + int num_xcp, nps_mode; + u16 supp_nps_modes; + bool comp_mode; + nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, + &supp_nps_modes); + if (r) + return false; + + comp_mode = !!(BIT(nps_mode) & supp_nps_modes); num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; + return comp_mode && num_xcc > 0; case AMDGPU_DPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0; + return comp_mode && (num_xcc % 4) == 0; case AMDGPU_TPX_PARTITION_MODE: - return (adev->gmc.num_mem_partitions == 1 || - adev->gmc.num_mem_partitions == 3) && - ((num_xcc % 3) == 0); + return comp_mode && ((num_xcc % 3) == 0); case AMDGPU_QPX_PARTITION_MODE: num_xccs_per_xcp = num_xcc / 4; - return (adev->gmc.num_mem_partitions == 1 || - adev->gmc.num_mem_partitions == 4) && - (num_xccs_per_xcp >= 2); + return comp_mode && (num_xccs_per_xcp >= 2); case AMDGPU_CPX_PARTITION_MODE: - /* (num_xcc > 1) because 1 XCC is considered SPX, not CPX. - * (num_xcc % adev->gmc.num_mem_partitions) == 0 because - * num_compute_partitions can't be less than num_mem_partitions - */ - return ((num_xcc > 1) && - (num_xcc % adev->gmc.num_mem_partitions) == 0); + return comp_mode && (num_xcc > 1); default: return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 81d195d366ce..427b073de2fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1444,6 +1444,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx) if (vbios_str == NULL) vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1; } + OPTIMIZER_HIDE_VAR(vbios_str); if (vbios_str != NULL && *vbios_str == 0) vbios_str++; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 521b9faab180..492813ab1b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -458,8 +458,8 @@ bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connect u8 link_status[DP_LINK_STATUS_SIZE]; struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv; - if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, link_status) - <= 0) + if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, + link_status) < 0) return false; if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count)) return false; @@ -616,7 +616,7 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd); if (drm_dp_dpcd_read_link_status(dp_info->aux, - dp_info->link_status) <= 0) { + dp_info->link_status) < 0) { DRM_ERROR("displayport link status failed\n"); break; } @@ -681,7 +681,7 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd); if (drm_dp_dpcd_read_link_status(dp_info->aux, - dp_info->link_status) <= 0) { + dp_info->link_status) < 0) { DRM_ERROR("displayport link status failed\n"); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 508cea965983..9e8715b4739d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -56,6 +56,8 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block); +u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); + MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin"); MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin"); MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin"); @@ -67,9 +69,6 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin"); MODULE_FIRMWARE("amdgpu/mullins_sdma.bin"); MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin"); -u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); - - static void cik_sdma_free_microcode(struct amdgpu_device *adev) { int i; @@ -993,14 +992,9 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block) static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block) { - int r; struct amdgpu_device *adev = ip_block->adev; - r = cik_sdma_start(adev); - if (r) - return r; - - return r; + return cik_sdma_start(adev); } static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1040,14 +1034,10 @@ static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block) static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - u32 tmp; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | - SRBM_STATUS2__SDMA1_BUSY_MASK); - - if (!tmp) + if (cik_sdma_is_idle(ip_block)) return 0; udelay(1); } diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 279288365940..8aca4f2734f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -60,9 +60,6 @@ #define AUD5_REGISTER_OFFSET (0x179d - 0x1780) #define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780) -#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 -#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index df401aded662..bf7c22f81cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3075,7 +3075,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } @@ -3227,7 +3227,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 80f01c3989cd..47e05783c4a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3206,7 +3206,7 @@ static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } @@ -3358,7 +3358,7 @@ static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -3488,8 +3488,7 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = { .set_powergating_state = dce_v11_0_set_powergating_state, }; -static void -dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, +static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 255c70959343..276c025c4c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -287,7 +287,7 @@ static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -412,7 +412,7 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, { if (!render) WREG32(mmVGA_RENDER_CONTROL, - RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL); + RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK); } static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) @@ -1011,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, /* select wm A */ arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset); tmp = arb_control3; - tmp &= ~LATENCY_WATERMARK_MASK(3); - tmp |= LATENCY_WATERMARK_MASK(1); + tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); + tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp); WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) | (line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT))); /* select wm B */ tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset); - tmp &= ~LATENCY_WATERMARK_MASK(3); - tmp |= LATENCY_WATERMARK_MASK(2); + tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); + tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT); WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp); WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) | @@ -1089,7 +1089,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev, } WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset, - DC_LB_MEMORY_CONFIG(tmp)); + (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT)); WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT)); @@ -1306,6 +1306,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 offset; struct drm_connector *connector; struct drm_connector_list_iter iter; struct amdgpu_connector *amdgpu_connector = NULL; @@ -1327,6 +1328,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, }; + if (!dig || !dig->afmt || !dig->afmt->pin) + return; + + offset = dig->afmt->pin->offset; + drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { @@ -1348,7 +1354,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) return; for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { - u32 tmp = 0; + u32 value = 0; u8 stereo_freqs = 0; int max_channels = -1; int j; @@ -1358,12 +1364,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) if (sad->format == eld_reg_to_type[i][1]) { if (sad->channels > max_channels) { - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - MAX_CHANNELS, sad->channels); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - DESCRIPTOR_BYTE_2, sad->byte2); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES, sad->freq); + value = (sad->channels << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) | + (sad->byte2 << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) | + (sad->freq << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT); max_channels = sad->channels; } @@ -1374,13 +1380,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) } } - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); + value |= (stereo_freqs << + AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT); + + WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value); } kfree(sads); - } static void dce_v6_0_audio_enable(struct amdgpu_device *adev, @@ -1886,7 +1892,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_bo *abo; uint64_t fb_location, tiling_flags; uint32_t fb_format, fb_pitch_pixels, pipe_config; - u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE); + u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); u32 viewport_w, viewport_h; int r; bool bypass_lut = false; @@ -1926,76 +1932,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, switch (target_fb->format->format) { case DRM_FORMAT_C8: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) | - GRPH_FORMAT(GRPH_FORMAT_INDEXED)); + fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); break; case DRM_FORMAT_XRGB4444: case DRM_FORMAT_ARGB4444: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB4444)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB1555: case DRM_FORMAT_ARGB1555: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB1555)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_BGRX5551: case DRM_FORMAT_BGRA5551: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_BGRA5551)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_RGB565: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB565)); + fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16); + fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB8888: case DRM_FORMAT_ARGB8888: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB8888)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; case DRM_FORMAT_XRGB2101010: case DRM_FORMAT_ARGB2101010: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB2101010)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; case DRM_FORMAT_BGRX1010102: case DRM_FORMAT_BGRA1010102: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_BGRA1010102)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: - fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | - GRPH_FORMAT(GRPH_FORMAT_ARGB8888)); - fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) | - GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R)); + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); + fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) | + (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT)); #ifdef __BIG_ENDIAN - fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); + fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif break; default: @@ -2013,18 +2019,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - fb_format |= GRPH_NUM_BANKS(num_banks); - fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1); - fb_format |= GRPH_TILE_SPLIT(tile_split); - fb_format |= GRPH_BANK_WIDTH(bankw); - fb_format |= GRPH_BANK_HEIGHT(bankh); - fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect); + fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT); + fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT); + fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT); + fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT); + fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT); + fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT); } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) { - fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1); + fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT); } pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); - fb_format |= GRPH_PIPE_CONFIG(pipe_config); + fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT); dce_v6_0_vga_enable(crtc, false); @@ -2040,7 +2046,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); + (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK); WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap); @@ -2108,14 +2114,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc, if (mode->flags & DRM_MODE_FLAG_INTERLACE) WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, - INTERLEAVE_EN); + DATA_FORMAT__INTERLEAVE_EN_MASK); else WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0); } static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -2125,15 +2130,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) | - (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT))); + ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) | + (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT))); WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK); WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset, PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK); WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) | - (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT))); + ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) | + (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT))); WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0); @@ -2160,19 +2165,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) } WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) | - (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) | - ICON_DEGAMMA_MODE(0) | - (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT))); + ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) | + (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT))); WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) | - (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT))); + ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) | + (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT))); WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) | - (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT))); + ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) | + (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT))); WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, - ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) | - (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT))); + ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) | + (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT))); /* XXX match this to the depth of the crtc fmt block, move to modeset? */ WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0); @@ -2267,8 +2272,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc) WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) | (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT)); - - } static void dce_v6_0_show_cursor(struct drm_crtc *crtc) @@ -2285,7 +2288,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc) CUR_CONTROL__CURSOR_EN_MASK | (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) | (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT)); - } static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc, @@ -2596,7 +2598,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct drm_encoder *encoder; @@ -2669,7 +2670,7 @@ static void dce_v6_0_panic_flush(struct drm_plane *plane) /* Disable DC tiling */ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); - fb_format &= ~GRPH_ARRAY_MODE(0x7); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); } @@ -2745,7 +2746,6 @@ static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block) static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; - bool ret; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->mode_info.num_crtc; i++) { @@ -2789,8 +2789,7 @@ static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - ret = amdgpu_atombios_get_connector_info_from_object_table(adev); - if (ret) + if (amdgpu_atombios_get_connector_info_from_object_table(adev)) amdgpu_display_print_display_setup(adev_to_drm(adev)); else return -EINVAL; @@ -2986,12 +2985,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: interrupt_mask = RREG32(mmINT_MASK + reg_block); - interrupt_mask &= ~VBLANK_INT_MASK; + interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK; WREG32(mmINT_MASK + reg_block, interrupt_mask); break; case AMDGPU_IRQ_STATE_ENABLE: interrupt_mask = RREG32(mmINT_MASK + reg_block); - interrupt_mask |= VBLANK_INT_MASK; + interrupt_mask |= INT_MASK__VBLANK_INT_MASK; WREG32(mmINT_MASK + reg_block, interrupt_mask); break; default: @@ -3006,28 +3005,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, } -static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned hpd, enum amdgpu_interrupt_state state) { u32 dc_hpd_int_cntl; - if (type >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", type); + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hpd %d\n", hpd); return 0; } switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); - dc_hpd_int_cntl &= ~DC_HPDx_INT_EN; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); + dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]); - dc_hpd_int_cntl |= DC_HPDx_INT_EN; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl); + dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl); break; default: break; @@ -3036,7 +3035,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev, return 0; } -static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3096,7 +3095,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data[0]) { case 0: /* vblank */ if (disp_int & interrupt_status_offsets[crtc].vblank) - WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK); + WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK); else DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); @@ -3107,7 +3106,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, break; case 1: /* vline */ if (disp_int & interrupt_status_offsets[crtc].vline) - WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK); + WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK); else DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); @@ -3121,7 +3120,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, return 0; } -static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev, +static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3172,7 +3171,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev, spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){ + if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " "AMDGPU_FLIP_SUBMITTED(%d)\n", amdgpu_crtc->pflip_status, @@ -3249,12 +3248,10 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = { .set_powergating_state = dce_v6_0_set_powergating_state, }; -static void -dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, +static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); @@ -3274,7 +3271,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder) { - struct amdgpu_device *adev = drm_to_adev(encoder->dev); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); @@ -3314,7 +3310,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder) static void dce_v6_0_encoder_commit(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -3325,7 +3320,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder) static void dce_v6_0_encoder_disable(struct drm_encoder *encoder) { - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig; int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); @@ -3541,17 +3535,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = { - .set = dce_v6_0_set_crtc_interrupt_state, + .set = dce_v6_0_set_crtc_irq_state, .process = dce_v6_0_crtc_irq, }; static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = { - .set = dce_v6_0_set_pageflip_interrupt_state, + .set = dce_v6_0_set_pageflip_irq_state, .process = dce_v6_0_pageflip_irq, }; static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = { - .set = dce_v6_0_set_hpd_interrupt_state, + .set = dce_v6_0_set_hpd_irq_state, .process = dce_v6_0_hpd_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 07358546581f..e62ccf9eb73d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -271,7 +271,7 @@ static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev, u32 tmp; if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", hpd); + DRM_DEBUG("invalid hpd %d\n", hpd); return; } @@ -3021,7 +3021,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, } } -static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3029,7 +3029,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, u32 dc_hpd_int_cntl; if (type >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hdp %d\n", type); + DRM_DEBUG("invalid hpd %d\n", type); return 0; } @@ -3051,7 +3051,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev, return 0; } -static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3136,7 +3136,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, return 0; } -static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev, +static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, enum amdgpu_interrupt_state state) @@ -3547,17 +3547,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = { - .set = dce_v8_0_set_crtc_interrupt_state, + .set = dce_v8_0_set_crtc_irq_state, .process = dce_v8_0_crtc_irq, }; static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = { - .set = dce_v8_0_set_pageflip_interrupt_state, + .set = dce_v8_0_set_pageflip_irq_state, .process = dce_v8_0_pageflip_irq, }; static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = { - .set = dce_v8_0_set_hpd_interrupt_state, + .set = dce_v8_0_set_hpd_irq_state, .process = dce_v8_0_hpd_irq, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 23e6a05359c2..75ea071744eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -368,11 +368,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), @@ -421,7 +416,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { @@ -448,7 +452,32 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR), SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI), SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI) + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), + /* gfx header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_10_1[] = { @@ -4296,9 +4325,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; int ctx_reg_offset; if (adev->gfx.rlc.cs_data == NULL) @@ -4306,39 +4333,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } - - ctx_reg_offset = - SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(ctx_reg_offset); buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev) @@ -4752,6 +4755,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) int i, j, k, r, ring_id = 0; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -4763,7 +4767,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 1, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -4778,7 +4782,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -4800,7 +4804,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 101 && adev->gfx.pfp_fw_version >= 158 && - adev->gfx.mec_fw_version >= 152) { + adev->gfx.mec_fw_version >= 151) { adev->gfx.enable_cleaner_shader = true; r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); if (r) { @@ -4810,7 +4814,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } break; case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; @@ -4826,6 +4832,34 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(10, 3, 6): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 14 && + adev->gfx.pfp_fw_version >= 17 && + adev->gfx.mec_fw_version >= 24) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(10, 3, 7): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 4 && + adev->gfx.pfp_fw_version >= 9 && + adev->gfx.mec_fw_version >= 12) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -4886,7 +4920,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) /* set up the gfx ring */ for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (j = 0; j < num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) continue; @@ -9645,9 +9679,14 @@ static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_10[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "mmCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_10[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -9708,9 +9747,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block) nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_10[reg])); + if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_10[reg])); } index += reg_count; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2a5c2a1ae3c7..afd6d59164bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -48,6 +48,8 @@ #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 #define GFX11_MEC_HPD_SIZE 2048 @@ -177,9 +179,13 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), @@ -230,7 +236,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { @@ -259,7 +274,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_11_0[] = { @@ -580,33 +612,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; - - r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -633,12 +650,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -833,9 +848,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; int ctx_reg_offset; if (adev->gfx.rlc.cs_data == NULL) @@ -843,39 +856,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } - - ctx_reg_offset = - SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(ctx_reg_offset); buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) @@ -1056,14 +1045,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, #define MQD_FWWORKAREA_SIZE 484 #define MQD_FWWORKAREA_ALIGNMENT 256 -static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, +static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, struct amdgpu_gfx_shadow_info *shadow_info) { - if (adev->gfx.cp_gfx_shadow) { - shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; - shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; - shadow_info->csa_size = MQD_FWWORKAREA_SIZE; - shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); return 0; } else { memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); @@ -1136,6 +1132,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; + if (adev->gfx.disable_kq) { + ring->no_scheduler = true; + ring->no_user_submission = true; + } if (!ring_id) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; @@ -1568,24 +1568,18 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { - int i, j, k, r, ring_id = 0; + int i, j, k, r, ring_id; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe_per_mec = 4; - adev->gfx.mec.num_queue_per_pipe = 4; - break; - case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): @@ -1593,7 +1587,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 2; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; @@ -1608,6 +1602,35 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + if (!adev->gfx.disable_uq && + adev->gfx.me_fw_version >= 2390 && + adev->gfx.pfp_fw_version >= 2530 && + adev->gfx.mec_fw_version >= 2600 && + adev->mes.fw_version[0] >= 120) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): + /* add firmware version checks here */ + if (0 && !adev->gfx.disable_uq) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): @@ -1640,6 +1663,34 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 5, 2): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 12 && + adev->gfx.pfp_fw_version >= 15 && + adev->gfx.mec_fw_version >= 15) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(11, 5, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 7 && + adev->gfx.pfp_fw_version >= 8 && + adev->gfx.mec_fw_version >= 8) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -1701,37 +1752,42 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; + if (adev->gfx.num_gfx_rings) { + ring_id = 0; + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; - r = gfx_v11_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + r = gfx_v11_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, - k, j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) + continue; - r = gfx_v11_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v11_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } @@ -4061,6 +4117,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4081,6 +4139,16 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); + mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -4205,6 +4273,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ @@ -4256,6 +4326,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -4509,11 +4583,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) return r; } - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->gfx.disable_kq) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we don't want to set ring->ready */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + } + if (amdgpu_async_gfx_ring) + amdgpu_gfx_disable_kgq(adev, 0); + } else { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } for (i = 0; i < adev->gfx.num_compute_rings; i++) { @@ -4722,6 +4808,49 @@ static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } +static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int m, p, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + return 0; +} + static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -4731,9 +4860,11 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v11_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { - if (amdgpu_async_gfx_ring) { + if (amdgpu_async_gfx_ring && + !adev->gfx.disable_kq) { if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } @@ -5059,11 +5190,36 @@ static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: + adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } + adev->gfx.funcs = &gfx_v11_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + /* We need one GFX ring temporarily to set up + * the clear state. + */ + adev->gfx.num_gfx_rings = 1; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v11_0_set_kiq_pm4_funcs(adev); gfx_v11_0_set_ring_funcs(adev); @@ -5094,6 +5250,11 @@ static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); if (r) return r; + + r = gfx_v11_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; + return 0; } @@ -5691,10 +5852,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -5714,10 +5871,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -5775,8 +5928,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -5804,10 +5956,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -6036,28 +6185,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -6296,25 +6430,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -6481,27 +6613,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); - } - break; - default: - BUG(); - break; } } @@ -6609,6 +6743,69 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + + +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; + } + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, + * so the pipe reset status relies on the later gfx ring test result. + */ + return 0; +} + static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -6618,8 +6815,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return -EINVAL; r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); - if (r) - return r; + if (r) { + + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v11_reset_gfx_pipe(ring); + if (r) + return r; + } r = gfx_v11_0_kgq_init_queue(ring, true); if (r) { @@ -6636,6 +6838,136 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return amdgpu_ring_test_ring(ring); } +static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v11_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + if (ring->me == 1) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE3_RESET, 0); + break; + default: + break; + } + /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v11_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe + * reset status relies on the compute ring test result. + */ + return 0; +} + static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -6646,8 +6978,10 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "reset via MMIO failed %d\n", r); - return r; + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v11_0_reset_compute_pipe(ring); + if (r) + return r; } r = gfx_v11_0_kcq_init_queue(ring, true); @@ -6693,9 +7027,14 @@ static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printe for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_11[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_11[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -6755,9 +7094,16 @@ static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) /* ME0 is for GFX so start from 1 for CP */ soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_11[reg])); + if (i && + gc_cp_reg_list_11[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, + regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_11[reg])); } index += reg_count; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 62a257a4a3e9..f09d96bfee16 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -44,6 +44,8 @@ #include "gfx_v12_0.h" #include "nbif_v6_3_1.h" #include "mes_v12_0.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX12_NUM_GFX_RINGS 1 #define GFX12_MEC_HPD_SIZE 2048 @@ -133,11 +135,14 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0), SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), @@ -186,7 +191,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), - SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), }; static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { @@ -215,7 +229,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), }; static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = { @@ -475,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; - - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -528,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -881,6 +895,34 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev, soc24_grbm_select(adev, me, pipe, q, vm); } +/* all sizes are in bytes */ +#define MQD_SHADOW_BASE_SIZE 73728 +#define MQD_SHADOW_BASE_ALIGNMENT 256 +#define MQD_FWWORKAREA_SIZE 484 +#define MQD_FWWORKAREA_ALIGNMENT 256 + +static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info) +{ + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; +} + +static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info, + bool skip_check) +{ + if (adev->gfx.cp_gfx_shadow || skip_check) { + gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info); + return 0; + } + + memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); + return -EINVAL; +} + static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter, .select_se_sh = &gfx_v12_0_select_se_sh, @@ -889,6 +931,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q, .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk, + .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info, }; static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev) @@ -1346,6 +1389,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) unsigned num_compute_rings; int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); @@ -1354,7 +1398,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; - adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.me.num_queue_per_pipe = 8; adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; @@ -1369,6 +1413,22 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (!adev->gfx.disable_uq && + adev->gfx.me_fw_version >= 2780 && + adev->gfx.pfp_fw_version >= 2840 && + adev->gfx.mec_fw_version >= 3050 && + adev->mes.fw_version[0] >= 123) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): @@ -1383,11 +1443,13 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) break; } - /* recalculate compute rings to use based on hardware configuration */ - num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * - adev->gfx.mec.num_queue_per_pipe) / 2; - adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, - num_compute_rings); + if (adev->gfx.num_compute_rings) { + /* recalculate compute rings to use based on hardware configuration */ + num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe) / 2; + adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, + num_compute_rings); + } /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, @@ -1433,37 +1495,41 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* set up the gfx ring */ - for (i = 0; i < adev->gfx.me.num_me; i++) { - for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) - continue; + if (adev->gfx.num_gfx_rings) { + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; - r = gfx_v12_0_gfx_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; - ring_id++; + r = gfx_v12_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } } } } - ring_id = 0; - /* set up the compute queues - allocate horizontally across pipes */ - for (i = 0; i < adev->gfx.mec.num_mec; ++i) { - for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { - for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, - 0, i, k, j)) - continue; + if (adev->gfx.num_compute_rings) { + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, + 0, i, k, j)) + continue; - r = gfx_v12_0_compute_ring_init(adev, ring_id, - i, k, j); - if (r) - return r; + r = gfx_v12_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; - ring_id++; + ring_id++; + } } } } @@ -2948,6 +3014,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); #endif + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -2968,6 +3036,14 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -3091,6 +3167,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->tmz_queue) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; /* set the wb address whether it's enabled or not */ @@ -3142,6 +3220,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_active = prop->hqd_active; + /* set UQ fenceaddress */ + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); + return 0; } @@ -3600,6 +3682,49 @@ static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block) return r; } +static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int m, p, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { + for (m = 0; m < adev->gfx.me.num_me; m++) { + for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { + for (m = 0; m < adev->gfx.mec.num_mec; ++m) { + for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + (m * adev->gfx.mec.num_pipe_per_mec) + + p; + if (enable) + r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, + irq_type); + if (r) + return r; + } + } + } + + return 0; +} + static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -3610,6 +3735,7 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + gfx_v12_0_set_userq_eop_interrupts(adev, false); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -3698,11 +3824,33 @@ static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = true; + break; + case 1: + adev->gfx.disable_kq = false; + adev->gfx.disable_uq = false; + break; + case 2: + adev->gfx.disable_kq = true; + adev->gfx.disable_uq = false; + break; + } + adev->gfx.funcs = &gfx_v12_0_gfx_funcs; - adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), - AMDGPU_MAX_COMPUTE_RINGS); + if (adev->gfx.disable_kq) { + adev->gfx.num_gfx_rings = 0; + adev->gfx.num_compute_rings = 0; + } else { + adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + } gfx_v12_0_set_kiq_pm4_funcs(adev); gfx_v12_0_set_ring_funcs(adev); @@ -3733,6 +3881,10 @@ static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block) if (r) return r; + r = gfx_v12_0_set_userq_eop_interrupts(adev, true); + if (r) + return r; + return 0; } @@ -4172,45 +4324,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -4235,42 +4359,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx12 now */ - } + BUG(); /* only DOORBELL method supported on gfx12 now */ } } @@ -4317,10 +4413,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4340,10 +4432,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4383,8 +4471,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -4412,10 +4499,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -4749,25 +4833,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -4934,27 +5016,29 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); + if (!adev->gfx.disable_kq) { + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + break; } - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (ring->me == me_id && ring->pipe == pipe_id && - ring->queue == queue_id) - drm_sched_fault(&ring->sched); - } - break; - default: - BUG(); - break; } } @@ -5160,6 +5244,69 @@ static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block) amdgpu_gfx_off_ctrl(adev, true); } +static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev) +{ + /* Disable the pipe reset until the CPFW fully support it.*/ + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); + return false; +} + +static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r; + + if (!gfx_v12_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v12_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE0_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 1); + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + PFP_PIPE1_RESET, 0); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, + ME_PIPE1_RESET, 0); + break; + default: + break; + } + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); + + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - + RS64_FW_UC_START_ADDR_LO; + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v12_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* Sometimes the ME start pc counter can't cache correctly, so the + * PC check only as a reference and pipe reset result rely on the + * later ring test. + */ + return 0; +} + static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -5170,8 +5317,10 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); if (r) { - dev_err(adev->dev, "reset via MES failed %d\n", r); - return r; + dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); + r = gfx_v12_reset_gfx_pipe(ring); + if (r) + return r; } r = gfx_v12_0_kgq_init_queue(ring, true); @@ -5189,6 +5338,89 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return amdgpu_ring_test_ring(ring); } +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe = 0, clean_pipe = 0; + int r = 0; + + if (!gfx_v12_pipe_reset_support(adev)) + return -EOPNOTSUPP; + + gfx_v12_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); + clean_pipe = reset_pipe; + + if (adev->gfx.rs64_enable) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE1_RESET, 0); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE2_RESET, 0); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, + MEC_PIPE3_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - + RS64_FW_UC_START_ADDR_LO; + } else { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 0); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 0); + break; + default: + break; + } + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); + /* Doesn't find the F32 MEC instruction pointer register, and suppose + * the driver won't run into the F32 mode. + */ + } + + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v12_0_unset_safe_mode(adev, 0); + + dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name, + r == 0 ? "successfully" : "failed"); + /* Need the ring test to verify the pipe reset result.*/ + return 0; +} + static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; @@ -5199,8 +5431,10 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "reset via MMIO failed %d\n", r); - return r; + dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); + r = gfx_v12_0_reset_compute_pipe(ring); + if (r) + return r; } r = gfx_v12_0_kcq_init_queue(ring, true); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 13fbee46417a..70d7a1f434c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -53,6 +53,9 @@ #define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 #define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001 +#define GFX6_NUM_GFX_RINGS 1 +#define GFX6_NUM_COMPUTE_RINGS 2 + static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev); @@ -1732,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) gfx_v6_0_get_cu_info(adev); gfx_v6_0_config_init(adev); - WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | - (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); - WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | - (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); + WREG32(mmCP_QUEUE_THRESHOLDS, + ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | + (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); + + /* set HW defaults for 3D engine */ + WREG32(mmCP_MEQ_THRESHOLDS, + (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | + (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); sx_debug_1 = RREG32(mmSX_DEBUG_1); WREG32(mmSX_DEBUG_1, sx_debug_1); @@ -2851,44 +2858,21 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v6_0_init_pg(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8181bd0e4f18..da0534ff1271 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -55,6 +55,9 @@ #define GFX7_NUM_GFX_RINGS 1 #define GFX7_MEC_HPD_SIZE 2048 +#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 +#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 + static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); @@ -3882,67 +3885,22 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); - switch (adev->asic_type) { - case CHIP_BONAIRE: - buffer[count++] = cpu_to_le32(0x16000012); - buffer[count++] = cpu_to_le32(0x00000000); - break; - case CHIP_KAVERI: - buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ - buffer[count++] = cpu_to_le32(0x00000000); - break; - case CHIP_KABINI: - case CHIP_MULLINS: - buffer[count++] = cpu_to_le32(0x00000000); /* XXX */ - buffer[count++] = cpu_to_le32(0x00000000); - break; - case CHIP_HAWAII: - buffer[count++] = cpu_to_le32(0x3a00161a); - buffer[count++] = cpu_to_le32(0x0000002e); - break; - default: - buffer[count++] = cpu_to_le32(0x00000000); - buffer[count++] = cpu_to_le32(0x00000000); - break; - } + buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); + buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v7_0_init_pg(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index bfedd487efc5..5ee2237d8ee8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1223,48 +1223,22 @@ out: static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); - buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - - PACKET3_SET_CONTEXT_REG_START); + buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + amdgpu_gfx_csb_preamble_end(buffer, count); } static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d7db4cb907ae..d377a7c57d5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -225,17 +225,36 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), - SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3), + /* packet headers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP) }; static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { @@ -277,6 +296,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP) }; enum ta_ras_gfx_subblock { @@ -1624,42 +1651,16 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) @@ -5441,16 +5442,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_ce_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); @@ -5473,16 +5466,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_de_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = IB_COMPLETION_STATUS_PREEMPTED; @@ -5672,19 +5657,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -5770,28 +5745,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v9_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); if (usegds) { de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); @@ -7339,9 +7299,14 @@ static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_9[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "mmCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_9[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -7378,9 +7343,13 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) soc15_grbm_select(adev, 1 + i, j, k, 0, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_9[reg])); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_9[reg])); } index += reg_count; } @@ -7394,8 +7363,14 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + /* Emit the cleaner shader */ - amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + else + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index d81449f9d822..c48cd47b531f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1547,7 +1547,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev, { uint32_t bank, way, mem; static const char * const vml2_way_str[] = { "BIGK", "4K" }; - static const char * const utcl2_rounter_str[] = { "VMC", "APT" }; + static const char * const utcl2_router_str[] = { "VMC", "APT" }; mem = instance % blk->num_mem_blocks; way = (instance / blk->num_mem_blocks) % blk->num_ways; @@ -1568,7 +1568,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev, dev_info( adev->dev, "GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n", - bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt); + bank, utcl2_router_str[mem], sec_cnt, ded_cnt); break; case ATC_L2_CACHE_2M: dev_info( diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 53fbf6ca7cdb..c233edf60569 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -105,9 +105,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE), SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), @@ -154,6 +151,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), }; struct amdgpu_gfx_ras gfx_v9_4_3_ras; @@ -1148,6 +1153,12 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } break; + case IP_VERSION(9, 5, 0): + if (adev->gfx.mec_fw_version >= 21) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; + } + break; default: break; } @@ -1262,6 +1273,22 @@ static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) } } +/* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1 + * DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain + * bit in SET_RESOURCES + */ +static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id) +{ + uint32_t data; + + if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)) + return; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1); + data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1); + WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data); +} + static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, int xcc_id) { @@ -1306,6 +1333,7 @@ static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id); gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id); + gfx_v9_4_3_xcc_init_sq(adev, xcc_id); } static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) @@ -1318,6 +1346,20 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) adev->gfx.config.db_debug2 = RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + /* ToDo: GC 9.4.4 */ + case IP_VERSION(9, 4, 3): + if (adev->gfx.mec_fw_version >= 184) + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; + break; + case IP_VERSION(9, 5, 0): + if (adev->gfx.mec_fw_version >= 23) + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; + break; + default: + break; + } + for (i = 0; i < num_xcc; i++) gfx_v9_4_3_xcc_constants_init(adev, i); } @@ -3447,9 +3489,7 @@ static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me, static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev) { - /*TODO: Need check gfx9.4.4 mec fw whether supports pipe reset as well.*/ - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && - adev->gfx.mec_fw_version >= 0x0000009b) + if (!!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)) return true; else dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n"); @@ -4558,12 +4598,21 @@ static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_pri "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", xcc_id, i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, - "%-50s \t 0x%08x\n", - gc_cp_reg_list_9_4_3[reg].reg_name, - adev->gfx.ip_dump_compute_queues - [xcc_offset + inst_offset + - reg]); + if (i && gc_cp_reg_list_9_4_3[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, + "%-50s \t 0x%08x\n", + "regCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + else + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); } inst_offset += reg_count; } @@ -4612,12 +4661,20 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) GET_INST(GC, xcc_id)); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues - [xcc_offset + - inst_offset + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST( - gc_cp_reg_list_9_4_3[reg], - GET_INST(GC, xcc_id))); + if (i && gc_cp_reg_list_9_4_3[reg].reg_offset == + regCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), + regCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); } inst_offset += reg_count; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 809b3a882d0d..a3e2787501f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -428,10 +428,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index fec9a007533a..72211409227b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -393,10 +393,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else @@ -844,7 +840,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.first_kfd_vmid = 8; + adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index c6f290704d47..b645d3e6a6c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -413,10 +413,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else @@ -820,7 +816,7 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.first_kfd_vmid = 8; + adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a992e79d9581..8030fcd64210 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -249,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) /* disable VGA render */ tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp &= ~VGA_VSTATUS_CNTL; + tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK; WREG32(mmVGA_RENDER_CONTROL, tmp); } /* Update configuration */ @@ -627,17 +627,16 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, "write" : "read", block, mc_client, mc_id); } -/* static const u32 mc_cg_registers[] = { - MC_HUB_MISC_HUB_CG, - MC_HUB_MISC_SIP_CG, - MC_HUB_MISC_VM_CG, - MC_XPB_CLK_GAT, - ATC_MISC_CG, - MC_CITF_MISC_WR_CG, - MC_CITF_MISC_RD_CG, - MC_CITF_MISC_VM_CG, - VM_L2_CG, + mmMC_HUB_MISC_HUB_CG, + mmMC_HUB_MISC_SIP_CG, + mmMC_HUB_MISC_VM_CG, + mmMC_XPB_CLK_GAT, + mmATC_MISC_CG, + mmMC_CITF_MISC_WR_CG, + mmMC_CITF_MISC_RD_CG, + mmMC_CITF_MISC_VM_CG, + mmVM_L2_CG, }; static const u32 mc_cg_ls_en[] = { @@ -672,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) data |= mc_cg_ls_en[i]; else data &= ~mc_cg_ls_en[i]; @@ -689,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { orig = data = RREG32(mc_cg_registers[i]); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) data |= mc_cg_en[i]; else data &= ~mc_cg_en[i]; @@ -705,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev, orig = data = RREG32_PCIE(ixPCIE_CNTL2); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1); data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1); data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1); @@ -728,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev, orig = data = RREG32(mmHDP_HOST_PATH_CNTL); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0); else data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1); @@ -744,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev, orig = data = RREG32(mmHDP_MEM_POWER_LS); - if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS)) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1); else data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0); @@ -752,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev, if (orig != data) WREG32(mmHDP_MEM_POWER_LS, data); } -*/ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type) { @@ -1098,6 +1096,20 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev, static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { + struct amdgpu_device *adev = ip_block->adev; + bool gate = false; + + if (state == AMD_CG_STATE_GATE) + gate = true; + + if (!(adev->flags & AMD_IS_APU)) { + gmc_v6_0_enable_mc_mgcg(adev, gate); + gmc_v6_0_enable_mc_ls(adev, gate); + } + gmc_v6_0_enable_bif_mgls(adev, gate); + gmc_v6_0_enable_hdp_mgcg(adev, gate); + gmc_v6_0_enable_hdp_ls(adev, gate); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 83e39f16044a..a8d5795084fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1157,17 +1157,10 @@ static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block) static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; - u32 tmp; struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - /* read MC_STATUS */ - tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK | - SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | - SRBM_STATUS__MCD_BUSY_MASK | - SRBM_STATUS__VMC_BUSY_MASK); - if (!tmp) + if (gmc_v7_0_is_idle(ip_block)) return 0; udelay(1); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 5effe8327d29..282197f4ffb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1213,10 +1213,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id) - mtype = is_local ? MTYPE_CC : MTYPE_UC; - else - mtype = MTYPE_UC; + mtype = is_local ? MTYPE_CC : MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { @@ -1336,7 +1333,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, mtype_local = MTYPE_CC; *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local); - } else if (adev->rev_id) { + } else { /* MTYPE_UC case */ *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); } @@ -1505,7 +1502,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; - adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; break; @@ -2075,6 +2071,9 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; } static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) @@ -2411,13 +2410,6 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->gmc.flush_tlb_needs_extra_type_2 = amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) && adev->gmc.xgmi.num_physical_nodes; - /* - * TODO: This workaround is badly documented and had a buggy - * implementation. We should probably verify what we do here. - */ - adev->gmc.flush_tlb_needs_extra_type_0 = - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && - adev->rev_id == 0; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index cbbeadeb53f7..e6c0d86d3486 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -36,22 +36,6 @@ #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L #define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 -static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - /* We just need to read back a register to post the write. - * Reading back the remapped register causes problems on - * some platforms so just read back the memory size register. - */ - if (adev->nbio.funcs->get_memsize) - adev->nbio.funcs->get_memsize(adev); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -185,7 +169,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = { }; const struct amdgpu_hdp_funcs hdp_v4_0_funcs = { - .flush_hdp = hdp_v4_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .invalidate_hdp = hdp_v4_0_invalidate_hdp, .update_clock_gating = hdp_v4_0_update_clock_gating, .get_clock_gating_state = hdp_v4_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 086a647308df..8bc001dc9f63 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -27,22 +27,6 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include -static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - /* We just need to read back a register to post the write. - * Reading back the remapped register causes problems on - * some platforms so just read back the memory size register. - */ - if (adev->nbio.funcs->get_memsize) - adev->nbio.funcs->get_memsize(adev); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -222,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev) } const struct amdgpu_hdp_funcs hdp_v5_0_funcs = { - .flush_hdp = hdp_v5_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .invalidate_hdp = hdp_v5_0_invalidate_hdp, .update_clock_gating = hdp_v5_0_update_clock_gating, .get_clock_gating_state = hdp_v5_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index 6ccd31c8bc69..ec20daf4272c 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -30,22 +30,6 @@ #define regHDP_CLK_CNTL_V6_1 0xd5 #define regHDP_CLK_CNTL_V6_1_BASE_IDX 0 -static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - /* We just need to read back a register to post the write. - * Reading back the remapped register causes problems on - * some platforms so just read back the memory size register. - */ - if (adev->nbio.funcs->get_memsize) - adev->nbio.funcs->get_memsize(adev); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -154,7 +138,7 @@ static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev, } const struct amdgpu_hdp_funcs hdp_v6_0_funcs = { - .flush_hdp = hdp_v6_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .update_clock_gating = hdp_v6_0_update_clock_gating, .get_clock_gating_state = hdp_v6_0_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 2c9239a22f39..ed1debc03507 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -27,22 +27,6 @@ #include "hdp/hdp_7_0_0_sh_mask.h" #include -static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - /* We just need to read back a register to post the write. - * Reading back the remapped register causes problems on - * some platforms so just read back the memory size register. - */ - if (adev->nbio.funcs->get_memsize) - adev->nbio.funcs->get_memsize(adev); - } else { - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - } -} - static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -142,7 +126,7 @@ static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev, } const struct amdgpu_hdp_funcs hdp_v7_0_funcs = { - .flush_hdp = hdp_v7_0_flush_hdp, + .flush_hdp = amdgpu_hdp_generic_flush, .update_clock_gating = hdp_v7_0_update_clock_gating, .get_clock_gating_state = hdp_v7_0_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index eb4185dcbd1d..5900b560b7de 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) if (ret) return ret; } + ih[i]->overflow = false; } /* update doorbell range for ih ring 0 */ @@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + if (!amdgpu_sriov_vf(adev)) + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + else + ih->overflow = true; /* When a ring buffer overflow happen start parsing interrupt * from the last not overwritten vector (wptr + 32). Hopefully diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 218e16b68f1d..cb94bd71300f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -28,11 +28,13 @@ #include "soc15d.h" #include "jpeg_v4_0_3.h" #include "jpeg_v5_0_1.h" +#include "mmsch_v5_0.h" #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev); static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, @@ -156,21 +158,16 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - ring->use_doorbell = false; + ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); if (!amdgpu_sriov_vf(adev)) { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 11 * jpeg_inst; } else { - if (j < 4) - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 4 + j + 32 * jpeg_inst; - else - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 8 + j + 32 * jpeg_inst; + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 2 + j + 32 * jpeg_inst; } sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, @@ -237,7 +234,10 @@ static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) int i, j, r, jpeg_inst; if (amdgpu_sriov_vf(adev)) { - /* jpeg_v5_0_1_start_sriov(adev); */ + r = jpeg_v5_0_1_start_sriov(adev); + if (r) + return r; + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; @@ -264,7 +264,7 @@ static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) ring = &adev->jpeg.inst[i].ring_dec[j]; if (ring->use_doorbell) WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, - (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->pipe, ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | VCN_JPEG_DB_CTRL__EN_MASK); @@ -291,8 +291,10 @@ static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) cancel_delayed_work_sync(&adev->jpeg.idle_work); - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + } return ret; } @@ -422,6 +424,119 @@ static void jpeg_v5_0_1_init_jrbc(struct amdgpu_ring *ring) reg_offset); } +static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw, item_offset; + uint32_t init_status; + int i, j, jpeg_inst; + + struct mmsch_v5_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v5_0_cmd_end end = { {0} }; + struct mmsch_v5_0_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + jpeg_inst = GET_INST(JPEG, i); + + memset(&header, 0, sizeof(struct mmsch_v5_0_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + item_offset = header.total_size; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + table_size = 0; + + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW); + MMSCH_V5_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH); + MMSCH_V5_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC_RB_SIZE); + MMSCH_V5_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); + + if (j < 5) { + header.mjpegdec0[j].table_offset = item_offset; + header.mjpegdec0[j].init_status = 0; + header.mjpegdec0[j].table_size = table_size; + } else { + header.mjpegdec1[j - 5].table_offset = item_offset; + header.mjpegdec1[j - 5].init_status = 0; + header.mjpegdec1[j - 5].table_size = table_size; + } + header.total_size += table_size; + item_offset += table_size; + } + + MMSCH_V5_0_INSERT_END(); + + /* send init table to MMSCH */ + size = sizeof(struct mmsch_v5_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x00000001; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + init_status = + ((struct mmsch_v5_0_init_header *)(table_loc))->mjpegdec0[i].init_status; + while (resp != expected) { + resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP); + + if (resp != 0) + break; + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && + init_status != MMSCH_VF_ENGINE_STATUS__PASS) + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", + resp, init_status); + + } + return 0; +} + /** * jpeg_v5_0_1_start - start JPEG block * @@ -581,6 +696,11 @@ static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, struct amdgpu_device *adev = ip_block->adev; int ret; + if (amdgpu_sriov_vf(adev)) { + adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->jpeg.cur_state) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c new file mode 100644 index 000000000000..d6f50b13e2ba --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" + +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE + +static int +mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + DRM_ERROR("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_bo_unreserve(bo); + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + return ret; +} + +static int +mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + uint64_t wptr) +{ + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + int ret; + + wptr_vm = queue->vm; + ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (ret) + return ret; + + wptr &= AMDGPU_GMC_HOLE_MASK; + wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + DRM_ERROR("Failed to lookup wptr bo\n"); + return -EINVAL; + } + + wptr_obj->obj = wptr_mapping->bo_va->base.bo; + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); + return -EINVAL; + } + + ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); + if (ret) { + DRM_ERROR("Failed to map wptr bo to GART\n"); + return ret; + } + + queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj); + return 0; +} + +static int convert_to_mes_priority(int priority) +{ + switch (priority) { + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW: + default: + return AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW: + return AMDGPU_MES_PRIORITY_LEVEL_LOW; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH: + return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH: + return AMDGPU_MES_PRIORITY_LEVEL_HIGH; + } +} + +static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + struct amdgpu_mqd_prop *userq_props = queue->userq_prop; + struct mes_add_queue_input queue_input; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); + + queue_input.process_va_start = 0; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; + + /* set process quantum to 10 ms and gang quantum to 1 ms as default */ + queue_input.process_quantum = 100000; + queue_input.gang_quantum = 10000; + queue_input.paging = false; + + queue_input.process_context_addr = ctx->gpu_addr; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; + queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority); + + queue_input.process_id = queue->vm->pasid; + queue_input.queue_type = queue->queue_type; + queue_input.mqd_addr = queue->mqd.gpu_addr; + queue_input.wptr_addr = userq_props->wptr_gpu_addr; + queue_input.queue_size = userq_props->queue_size >> 2; + queue_input.doorbell_offset = userq_props->doorbell_index; + queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo); + queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + DRM_ERROR("Failed to map queue in HW, err (%d)\n", r); + return r; + } + + DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); + return 0; +} + +static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_remove_queue_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r; + + memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); + queue_input.doorbell_offset = queue->doorbell_index; + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) + DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); + return r; +} + +static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *mqd_user) +{ + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r, size; + + /* + * The FW expects at least one page space allocated for + * process ctx and gang ctx each. Create an object + * for the same. + */ + size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; + r = amdgpu_userq_create_object(uq_mgr, ctx, size); + if (r) { + DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); + return r; + } + + return 0; +} + +static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args_in, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; + struct drm_amdgpu_userq_in *mqd_user = args_in; + struct amdgpu_mqd_prop *userq_props; + int r; + + /* Structure to initialize MQD for userqueue using generic MQD init function */ + userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL); + if (!userq_props) { + DRM_ERROR("Failed to allocate memory for userq_props\n"); + return -ENOMEM; + } + + if (!mqd_user->wptr_va || !mqd_user->rptr_va || + !mqd_user->queue_va || mqd_user->queue_size == 0) { + DRM_ERROR("Invalid MQD parameters for userqueue\n"); + r = -EINVAL; + goto free_props; + } + + r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); + if (r) { + DRM_ERROR("Failed to create MQD object for userqueue\n"); + goto free_props; + } + + /* Initialize the MQD BO with user given values */ + userq_props->wptr_gpu_addr = mqd_user->wptr_va; + userq_props->rptr_gpu_addr = mqd_user->rptr_va; + userq_props->queue_size = mqd_user->queue_size; + userq_props->hqd_base_gpu_addr = mqd_user->queue_va; + userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; + userq_props->use_doorbell = true; + userq_props->doorbell_index = queue->doorbell_index; + userq_props->fence_address = queue->fence_drv->gpu_addr; + + if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { + struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; + + if (mqd_user->mqd_size != sizeof(*compute_mqd)) { + DRM_ERROR("Invalid compute IP MQD size\n"); + r = -EINVAL; + goto free_mqd; + } + + compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(compute_mqd)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->eop_gpu_addr = compute_mqd->eop_va; + userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; + userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; + userq_props->hqd_active = false; + userq_props->tmz_queue = + mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; + kfree(compute_mqd); + } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { + struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid GFX MQD\n"); + r = -EINVAL; + goto free_mqd; + } + + mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_gfx_v11)) { + DRM_ERROR("Failed to read user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->shadow_addr = mqd_gfx_v11->shadow_va; + userq_props->csa_addr = mqd_gfx_v11->csa_va; + userq_props->tmz_queue = + mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; + kfree(mqd_gfx_v11); + } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { + struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid SDMA MQD\n"); + r = -EINVAL; + goto free_mqd; + } + + mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_sdma_v11)) { + DRM_ERROR("Failed to read sdma user MQD\n"); + r = -ENOMEM; + goto free_mqd; + } + + userq_props->csa_addr = mqd_sdma_v11->csa_va; + kfree(mqd_sdma_v11); + } + + queue->userq_prop = userq_props; + + r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); + if (r) { + DRM_ERROR("Failed to initialize MQD for userqueue\n"); + goto free_mqd; + } + + /* Create BO for FW operations */ + r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); + if (r) { + DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); + goto free_mqd; + } + + /* FW expects WPTR BOs to be mapped into GART */ + r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + if (r) { + DRM_ERROR("Failed to create WPTR mapping\n"); + goto free_ctx; + } + + return 0; + +free_ctx: + amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + +free_mqd: + amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); + +free_props: + kfree(userq_props); + + return r; +} + +static void +mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + kfree(queue->userq_prop); + amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); +} + +const struct amdgpu_userq_funcs userq_mes_funcs = { + .mqd_create = mes_userq_mqd_create, + .mqd_destroy = mes_userq_mqd_destroy, + .unmap = mes_userq_unmap, + .map = mes_userq_map, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ofa/ad102.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h similarity index 71% rename from drivers/gpu/drm/nouveau/nvkm/engine/ofa/ad102.c rename to drivers/gpu/drm/amd/amdgpu/mes_userqueue.h index 7ac87ef26aec..090ae8897770 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/ofa/ad102.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * Copyright 2023 Red Hat Inc. + * Copyright 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,27 +19,12 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. + * */ -#include "priv.h" -#include +#ifndef MES_USERQ_H +#define MES_USERQ_H +#include "amdgpu_userq.h" -#include - -static const struct nvkm_engine_func -ad102_ofa = { - .sclass = { - { -1, -1, NVC9FA_VIDEO_OFA }, - {} - } -}; - -int -ad102_ofa_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, - struct nvkm_engine **pengine) -{ - if (nvkm_gsp_rm(device->gsp)) - return r535_ofa_new(&ad102_ofa, device, type, inst, pengine); - - return -ENODEV; -} +extern const struct amdgpu_userq_funcs userq_mes_funcs; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index ef9538fbbf53..c9eba537de09 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -287,6 +287,23 @@ static int convert_to_mes_queue_type(int queue_type) return -1; } +static int convert_to_mes_priority_level(int priority_level) +{ + switch (priority_level) { + case AMDGPU_MES_PRIORITY_LEVEL_LOW: + return AMD_PRIORITY_LEVEL_LOW; + case AMDGPU_MES_PRIORITY_LEVEL_NORMAL: + default: + return AMD_PRIORITY_LEVEL_NORMAL; + case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM: + return AMD_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_MES_PRIORITY_LEVEL_HIGH: + return AMD_PRIORITY_LEVEL_HIGH; + case AMDGPU_MES_PRIORITY_LEVEL_REALTIME: + return AMD_PRIORITY_LEVEL_REALTIME; + } +} + static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { @@ -310,9 +327,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gang_quantum = input->gang_quantum; mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; + convert_to_mes_priority_level(input->inprocess_gang_priority); mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; + convert_to_mes_priority_level(input->gang_global_priority_level); mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; @@ -458,31 +475,6 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ return r; } -static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, - struct mes_reset_queue_input *input) -{ - if (input->use_mmio) - return mes_v11_0_reset_queue_mmio(mes, input->queue_type, - input->me_id, input->pipe_id, - input->queue_id, input->vmid); - - union MESAPI__RESET mes_reset_queue_pkt; - - memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); - - mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; - mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; - mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; - /*mes_reset_queue_pkt.reset_queue_only = 1;*/ - - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), - offsetof(union MESAPI__REMOVE_QUEUE, api_status)); -} - static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -649,7 +641,7 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; case MES_MISC_OP_CHANGE_CONFIG: if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { - dev_err(mes->adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n"); + dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); return -EINVAL; } misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; @@ -694,7 +686,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->compute_hqd_mask[i]; for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; @@ -723,7 +716,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->event_log_gpu_addr; } - if (enforce_isolation) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, @@ -753,8 +746,8 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, - struct mes_reset_legacy_queue_input *input) +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) { union MESAPI__RESET mes_reset_queue_pkt; @@ -772,7 +765,7 @@ static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); - if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + if (input->legacy_gfx) { mes_reset_queue_pkt.reset_legacy_gfx = 1; mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; mes_reset_queue_pkt.queue_id_lp = input->queue_id; @@ -798,7 +791,6 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, - .reset_legacy_queue = mes_v11_0_reset_legacy_queue, .reset_hw_queue = mes_v11_0_reset_hw_queue, }; @@ -1701,22 +1693,10 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -static int mes_v11_0_late_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && - (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))) - amdgpu_mes_self_test(adev); - - return 0; -} - static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .name = "mes_v11_0", .early_init = mes_v11_0_early_init, - .late_init = mes_v11_0_late_init, + .late_init = NULL, .sw_init = mes_v11_0_sw_init, .sw_fini = mes_v11_0_sw_fini, .hw_init = mes_v11_0_hw_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index e6ab617b9a40..b4f17332d466 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -274,6 +274,23 @@ static int convert_to_mes_queue_type(int queue_type) return -1; } +static int convert_to_mes_priority_level(int priority_level) +{ + switch (priority_level) { + case AMDGPU_MES_PRIORITY_LEVEL_LOW: + return AMD_PRIORITY_LEVEL_LOW; + case AMDGPU_MES_PRIORITY_LEVEL_NORMAL: + default: + return AMD_PRIORITY_LEVEL_NORMAL; + case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM: + return AMD_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_MES_PRIORITY_LEVEL_HIGH: + return AMD_PRIORITY_LEVEL_HIGH; + case AMDGPU_MES_PRIORITY_LEVEL_REALTIME: + return AMD_PRIORITY_LEVEL_REALTIME; + } +} + static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { @@ -297,9 +314,9 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gang_quantum = input->gang_quantum; mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; + convert_to_mes_priority_level(input->inprocess_gang_priority); mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; + convert_to_mes_priority_level(input->gang_global_priority_level); mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; @@ -477,32 +494,6 @@ static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ return r; } -static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, - struct mes_reset_queue_input *input) -{ - union MESAPI__RESET mes_reset_queue_pkt; - int pipe; - - memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); - - mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; - mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; - mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - - mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; - mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; - /*mes_reset_queue_pkt.reset_queue_only = 1;*/ - - if (mes->adev->enable_uni_mes) - pipe = AMDGPU_MES_KIQ_PIPE; - else - pipe = AMDGPU_MES_SCHED_PIPE; - - return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, - &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), - offsetof(union MESAPI__REMOVE_QUEUE, api_status)); -} - static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -762,7 +753,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } - if (enforce_isolation) + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) mes_set_hw_res_pkt.limit_single_process = 1; return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, @@ -845,8 +836,8 @@ static void mes_v12_0_enable_unmapped_doorbell_handling( WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); } -static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, - struct mes_reset_legacy_queue_input *input) +static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) { union MESAPI__RESET mes_reset_queue_pkt; int pipe; @@ -865,7 +856,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); - if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + if (input->legacy_gfx) { mes_reset_queue_pkt.reset_legacy_gfx = 1; mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; mes_reset_queue_pkt.queue_id_lp = input->queue_id; @@ -878,7 +869,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; } - if (mes->adev->enable_uni_mes) + if (input->is_kq) pipe = AMDGPU_MES_KIQ_PIPE; else pipe = AMDGPU_MES_SCHED_PIPE; @@ -896,7 +887,6 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .suspend_gang = mes_v12_0_suspend_gang, .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, - .reset_legacy_queue = mes_v12_0_reset_legacy_queue, .reset_hw_queue = mes_v12_0_reset_hw_queue, }; @@ -1811,21 +1801,10 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -static int mes_v12_0_late_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend) - amdgpu_mes_self_test(adev); - - return 0; -} - static const struct amd_ip_funcs mes_v12_0_ip_funcs = { .name = "mes_v12_0", .early_init = mes_v12_0_early_init, - .late_init = mes_v12_0_late_init, + .late_init = NULL, .sw_init = mes_v12_0_sw_init, .sw_fini = mes_v12_0_sw_fini, .hw_init = mes_v12_0_hw_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 84cde1239ee4..76167fadb292 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -30,6 +30,7 @@ #include "soc15_common.h" #include "soc15.h" #include "amdgpu_ras.h" +#include "amdgpu_psp.h" #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 @@ -192,10 +193,8 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) uint32_t tmp, inst_mask; int i; - /* Setup TLB control */ - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) { - tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) { + tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); @@ -209,7 +208,26 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL); + } else { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + } } } @@ -221,6 +239,9 @@ static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev) uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + if (amdgpu_sriov_vf(adev)) + return; + inst_mask = adev->aid_mask; for_each_inst(i, inst_mask) { for (j = 0; j < 5; j++) { /* DAGB instances */ @@ -454,6 +475,30 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) return 0; } +static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i, inst_mask; + + if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) { + tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL); + } else { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + } + } +} + static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; @@ -467,15 +512,6 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) for (i = 0; i < 16; i++) WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); - - /* Setup TLB control */ - tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, - 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp); - if (!amdgpu_sriov_vf(adev)) { /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL); @@ -485,6 +521,8 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0); } } + + mmhub_v1_8_disable_l1_tlb(adev); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h new file mode 100644 index 000000000000..6f749814929f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h @@ -0,0 +1,144 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V5_0_H__ +#define __MMSCH_V5_0_H__ + +#include "amdgpu_vcn.h" + +#define MMSCH_VERSION_MAJOR 5 +#define MMSCH_VERSION_MINOR 0 +#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) + +#define RB_ENABLED (1 << 0) +#define RB4_ENABLED (1 << 1) + +#define MMSCH_VF_ENGINE_STATUS__PASS 0x1 + +#define MMSCH_VF_MAILBOX_RESP__OK 0x1 +#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 +#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3 +#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4 +#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5 + +enum mmsch_v5_0_command_type { + MMSCH_COMMAND__DIRECT_REG_WRITE = 0, + MMSCH_COMMAND__DIRECT_REG_POLLING = 2, + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3, + MMSCH_COMMAND__INDIRECT_REG_WRITE = 8, + MMSCH_COMMAND__END = 0xf +}; + +struct mmsch_v5_0_table_info { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v5_0_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v5_0_table_info vcn0; + struct mmsch_v5_0_table_info mjpegdec0[5]; + struct mmsch_v5_0_table_info mjpegdec1[5]; +}; + +struct mmsch_v5_0_cmd_direct_reg_header { + uint32_t reg_offset : 28; + uint32_t command_type : 4; +}; + +struct mmsch_v5_0_cmd_indirect_reg_header { + uint32_t reg_offset : 20; + uint32_t reg_idx_space : 8; + uint32_t command_type : 4; +}; + +struct mmsch_v5_0_cmd_direct_write { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; + uint32_t reg_value; +}; + +struct mmsch_v5_0_cmd_direct_read_modify_write { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; + uint32_t write_data; + uint32_t mask_value; +}; + +struct mmsch_v5_0_cmd_direct_polling { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; + uint32_t mask_value; + uint32_t wait_value; +}; + +struct mmsch_v5_0_cmd_end { + struct mmsch_v5_0_cmd_direct_reg_header cmd_header; +}; + +struct mmsch_v5_0_cmd_indirect_write { + struct mmsch_v5_0_cmd_indirect_reg_header cmd_header; + uint32_t reg_value; +}; + +#define MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + size = sizeof(struct mmsch_v5_0_cmd_direct_read_modify_write); \ + size_dw = size / 4; \ + direct_rd_mod_wt.cmd_header.reg_offset = reg; \ + direct_rd_mod_wt.mask_value = mask; \ + direct_rd_mod_wt.write_data = data; \ + memcpy((void *)table_loc, &direct_rd_mod_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V5_0_INSERT_DIRECT_WT(reg, value) { \ + size = sizeof(struct mmsch_v5_0_cmd_direct_write); \ + size_dw = size / 4; \ + direct_wt.cmd_header.reg_offset = reg; \ + direct_wt.reg_value = value; \ + memcpy((void *)table_loc, &direct_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V5_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ + size = sizeof(struct mmsch_v5_0_cmd_direct_polling); \ + size_dw = size / 4; \ + direct_poll.cmd_header.reg_offset = reg; \ + direct_poll.mask_value = mask; \ + direct_poll.wait_value = wait; \ + memcpy((void *)table_loc, &direct_poll, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V5_0_INSERT_END() { \ + size = sizeof(struct mmsch_v5_0_cmd_end); \ + size_dw = size / 4; \ + memcpy((void *)table_loc, &end, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index f5411b798e11..48101a34e049 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -274,6 +274,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) { struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + struct amdgpu_reset_context reset_context = { 0 }; amdgpu_virt_fini_data_exchange(adev); @@ -281,8 +282,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) if (amdgpu_device_should_recover_gpu(adev) && (!amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; @@ -293,6 +292,19 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } } +static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); + amdgpu_virt_request_bad_pages(adev); + amdgpu_virt_init_data_exchange(adev); + up_read(&adev->reset_domain->sem); + } +} + static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -312,26 +324,42 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { enum idh_event event = xgpu_ai_mailbox_peek_msg(adev); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { - case IDH_FLR_NOTIFICATION: + case IDH_RAS_BAD_PAGES_NOTIFICATION: + xgpu_ai_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.bad_pages_work); + break; + case IDH_UNRECOV_ERR_NOTIFICATION: + xgpu_ai_mailbox_send_ack(adev); + ras->is_rma = true; + dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n"); if (amdgpu_sriov_runtime(adev)) WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->virt.flr_work), - "Failed to queue work! at %s", - __func__); + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); break; - case IDH_QUERY_ALIVE: - xgpu_ai_mailbox_send_ack(adev); - break; - /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore - * it byfar since that polling thread will handle it, - * other msg like flr complete is not handled here. - */ - case IDH_CLR_MSG_BUF: - case IDH_FLR_NOTIFICATION_CMPL: - case IDH_READY_TO_ACCESS_GPU: - default: + case IDH_FLR_NOTIFICATION: + if (amdgpu_sriov_runtime(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); + break; + case IDH_QUERY_ALIVE: + xgpu_ai_mailbox_send_ack(adev); + break; + /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore + * it byfar since that polling thread will handle it, + * other msg like flr complete is not handled here. + */ + case IDH_CLR_MSG_BUF: + case IDH_FLR_NOTIFICATION_CMPL: + case IDH_READY_TO_ACCESS_GPU: + default: break; } @@ -387,6 +415,7 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work); + INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index ed57cbc150af..874b9f8f9804 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -40,6 +40,7 @@ enum idh_request { IDH_LOG_VF_ERROR = 200, IDH_READY_TO_RESET = 201, IDH_RAS_POISON = 202, + IDH_REQ_RAS_BAD_PAGES = 205, }; enum idh_event { @@ -54,6 +55,9 @@ enum idh_event { IDH_RAS_POISON_READY, IDH_PF_SOFT_FLR_NOTIFICATION, IDH_RAS_ERROR_DETECTED, + IDH_RAS_BAD_PAGES_READY = 15, + IDH_RAS_BAD_PAGES_NOTIFICATION = 16, + IDH_UNRECOV_ERR_NOTIFICATION = 17, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 5aadf24cb202..f6d8597452ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -67,6 +67,8 @@ static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0); if (reg == IDH_FAIL) r = -EINVAL; + if (reg == IDH_UNRECOV_ERR_NOTIFICATION) + r = -ENODEV; else if (reg != event) return -ENOENT; @@ -103,6 +105,7 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) { int r; uint64_t timeout, now; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); now = (uint64_t)ktime_to_ms(ktime_get()); timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT; @@ -110,8 +113,16 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) do { r = xgpu_nv_mailbox_rcv_msg(adev, event); if (!r) { - dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now); + dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", + event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now); return 0; + } else if (r == -ENODEV) { + if (!amdgpu_ras_is_rma(adev)) { + ras->is_rma = true; + dev_err(adev->dev, "VF is in an unrecoverable state. " + "Runtime Services are halted.\n"); + } + return r; } msleep(10); @@ -166,6 +177,10 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, enum idh_event event = -1; send_request: + + if (amdgpu_ras_is_rma(adev)) + return -ENODEV; + xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3); switch (req) { @@ -187,6 +202,9 @@ send_request: case IDH_REQ_RAS_CPER_DUMP: event = IDH_RAS_CPER_DUMP_READY; break; + case IDH_REQ_RAS_BAD_PAGES: + event = IDH_RAS_BAD_PAGES_READY; + break; default: break; } @@ -320,6 +338,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) { struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + struct amdgpu_reset_context reset_context = { 0 }; amdgpu_virt_fini_data_exchange(adev); @@ -330,8 +349,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) { - struct amdgpu_reset_context reset_context; - memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; @@ -342,6 +359,19 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) } } +static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); + amdgpu_virt_request_bad_pages(adev); + amdgpu_virt_init_data_exchange(adev); + up_read(&adev->reset_domain->sem); + } +} + static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -364,8 +394,27 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { enum idh_event event = xgpu_nv_mailbox_peek_msg(adev); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { + case IDH_RAS_BAD_PAGES_NOTIFICATION: + xgpu_nv_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.bad_pages_work); + break; + case IDH_UNRECOV_ERR_NOTIFICATION: + xgpu_nv_mailbox_send_ack(adev); + if (!amdgpu_ras_is_rma(adev)) { + ras->is_rma = true; + dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n"); + } + + if (amdgpu_sriov_runtime(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); + break; case IDH_FLR_NOTIFICATION: if (amdgpu_sriov_runtime(adev)) WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, @@ -436,6 +485,7 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); + INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work); return 0; } @@ -480,6 +530,11 @@ static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr) adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0); } +static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev) +{ + return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, @@ -492,4 +547,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr, .req_ras_err_count = xgpu_nv_req_ras_err_count, .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump, + .req_bad_pages = xgpu_nv_req_ras_bad_pages, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 72c9fceb9d79..5808689562cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -42,6 +42,7 @@ enum idh_request { IDH_RAS_POISON = 202, IDH_REQ_RAS_ERROR_COUNT = 203, IDH_REQ_RAS_CPER_DUMP = 204, + IDH_REQ_RAS_BAD_PAGES = 205, }; enum idh_event { @@ -58,6 +59,9 @@ enum idh_event { IDH_RAS_ERROR_DETECTED, IDH_RAS_ERROR_COUNT_READY = 11, IDH_RAS_CPER_DUMP_READY = 14, + IDH_RAS_BAD_PAGES_READY = 15, + IDH_RAS_BAD_PAGES_NOTIFICATION = 16, + IDH_UNRECOV_ERR_NOTIFICATION = 17, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index f23cb79110d6..a376f072700d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -177,8 +177,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do { u32 doorbell_range = 0, doorbell_ctrl = 0; u32 aid_id = instance; + u32 range_size; if (use_doorbell) { + range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 5, 0)) ? + 0xb : 0x9; doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_OFFSET_ENTRY, @@ -186,7 +190,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_SIZE_ENTRY, - 0x9); + range_size); if (aid_id) doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, @@ -204,7 +208,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, - S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9); + S2A_DOORBELL_PORT1_RANGE_SIZE, range_size); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index fcd708eae75c..80153f837470 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -34,9 +34,6 @@ #include "sdma0/sdma0_4_0_offset.h" #include "nbio/nbio_7_4_offset.h" -#include "oss/osssys_4_0_offset.h" -#include "oss/osssys_4_0_sh_mask.h" - MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); MODULE_FIRMWARE("amdgpu/renoir_ta.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin"); @@ -99,9 +96,6 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 0x80000000, 0x80000000, false); @@ -138,8 +132,6 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, true); @@ -147,37 +139,6 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) return ret; } -static void psp_v12_0_reroute_ih(struct psp_context *psp) -{ - struct amdgpu_device *adev = psp->adev; - uint32_t tmp; - - /* Change IH ring for VMC */ - tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); - - mdelay(20); - psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); - - /* Change IH ring for UMC */ - tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); - - mdelay(20); - psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); -} - static int psp_v12_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -186,49 +147,23 @@ static int psp_v12_0_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; - psp_v12_0_reroute_ih(psp); + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - if (amdgpu_sriov_vf(psp->adev)) { - /* Write low address of the ring to C2PMSG_102 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); - /* Write high address of the ring to C2PMSG_103 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); - - /* Write the ring initialization command to C2PMSG_101 */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, - GFX_CTRL_CMD_ID_INIT_GPCOM_RING); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); - - } else { - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); - } + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); return ret; } @@ -247,9 +182,6 @@ static int psp_v12_0_ring_stop(struct psp_context *psp, WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index afdf8ce3b4c5..df612fd9cc50 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -71,20 +71,13 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin"); /* Retry times for vmbx ready wait */ #define PSP_VMBX_POLLING_LIMIT 3000 -/* VBIOS gfl defines */ -#define MBOX_READY_MASK 0x80000000 -#define MBOX_STATUS_MASK 0x0000FFFF -#define MBOX_COMMAND_MASK 0x00FF0000 -#define MBOX_READY_FLAG 0x80000000 -#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 -#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 -#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 - /* memory training timeout define */ #define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 #define regMP1_PUB_SCRATCH0 0x3b10090 +#define PSP13_BL_STATUS_SIZE 100 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -151,6 +144,32 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp) return sol_reg != 0x0; } +static void psp_v13_0_bootloader_print_status(struct psp_context *psp, + const char *msg) +{ + struct amdgpu_device *adev = psp->adev; + u32 bl_status_reg; + char bl_status_msg[PSP13_BL_STATUS_SIZE]; + int i, at; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) { + at = 0; + for_each_inst(i, adev->aid_mask) { + bl_status_reg = + (SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92) + << 2) + + adev->asic_funcs->encode_ext_smn_addressing(i); + at += snprintf(bl_status_msg + at, + PSP13_BL_STATUS_SIZE - at, + " status(%02i): 0x%08x", i, + RREG32_PCIE_EXT(bl_status_reg)); + } + dev_info(adev->dev, "%s - %s", msg, bl_status_msg); + } +} + static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -196,6 +215,9 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) if (ret == 0) return 0; + if (retry_loop && !(retry_loop % 10)) + psp_v13_0_bootloader_print_status( + psp, "Waiting for bootloader completion"); } return ret; @@ -710,7 +732,8 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd) /* Ring the doorbell */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1); - if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE) + if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE || + cmd == C2PMSG_CMD_SPI_GET_FLASH_IMAGE) ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); else @@ -766,6 +789,37 @@ static int psp_v13_0_update_spirom(struct psp_context *psp, return 0; } +static int psp_v13_0_dump_spirom(struct psp_context *psp, + uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + /* Confirm PSP is ready to start */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + if (ret) { + dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); + return ret; + } + + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr)); + + ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO); + if (ret) + return ret; + + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr)); + + ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI); + if (ret) + return ret; + + ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_FLASH_IMAGE); + + return ret; +} + static int psp_v13_0_vbflash_status(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -858,6 +912,25 @@ static bool psp_v13_0_is_reload_needed(struct psp_context *psp) return false; } +static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val, + enum psp_reg_prog_id id) +{ + struct amdgpu_device *adev = psp->adev; + int ret = -EOPNOTSUPP; + + /* PSP will broadcast the value to all instances */ + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } + + return ret; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -879,11 +952,13 @@ static const struct psp_funcs psp_v13_0_funcs = { .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw, .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw, .update_spirom = psp_v13_0_update_spirom, + .dump_spirom = psp_v13_0_dump_spirom, .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, .get_ras_capability = psp_v13_0_get_ras_capability, .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, .is_reload_needed = psp_v13_0_is_reload_needed, + .reg_program_no_ring = psp_v13_0_reg_program_no_ring, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 688a720bbbbd..9c169112a5e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -106,8 +106,9 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); -static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -1333,6 +1334,11 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) } } +static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v4_4_2_stop_queue, + .start_kernel_queue = &sdma_v4_4_2_restore_queue, +}; + static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1351,8 +1357,6 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_vm_pte_funcs(adev); sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); - sdma_v4_4_2_set_engine_reset_funcs(adev); - return 0; } @@ -1447,6 +1451,7 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) /* Initialize guilty flags for GFX and PAGE queues */ adev->sdma.instance[i].gfx_guilty = false; adev->sdma.instance[i].page_guilty = false; + adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1678,11 +1683,12 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) return r; } -static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + u32 instance_id = GET_INST(SDMA0, ring->me); u32 inst_mask; uint64_t rptr; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -1715,11 +1721,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_ return 0; } -static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id) +static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { - int i; + struct amdgpu_device *adev = ring->adev; u32 inst_mask; - struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + int i; inst_mask = 1 << ring->me; udelay(50); @@ -1739,16 +1745,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instan return sdma_v4_4_2_inst_start(adev, inst_mask, true); } -static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { - .pre_reset = sdma_v4_4_2_stop_queue, - .post_reset = sdma_v4_4_2_restore_queue, -}; - -static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) -{ - amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_engine_reset_funcs); -} - static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -2373,7 +2369,9 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; case IP_VERSION(9, 5, 0): - /*TODO: enable the queue reset flag until fw supported */ + if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 0dce59f4f6e2..9505ae96fbec 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -112,6 +112,8 @@ static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring); +static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring); static const struct soc15_reg_golden golden_settings_sdma_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), @@ -369,67 +371,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -575,11 +546,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -588,15 +557,15 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se * sdma_v5_0_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer - * + * @inst_mask: mask of dma engine instances to be disabled * Stop the gfx async dma ring buffers (NAVI10). */ -static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) +static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { u32 rb_cntl, ib_cntl; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -688,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; + uint32_t inst_mask; + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!enable) { - sdma_v5_0_gfx_stop(adev); + sdma_v5_0_gfx_stop(adev, 1 << inst_mask); sdma_v5_0_rlc_stop(adev); } @@ -1046,33 +1017,22 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1085,10 +1045,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1100,8 +1057,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1124,38 +1080,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); - - r = amdgpu_ib_get(adev, NULL, 256, - AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1183,10 +1125,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1197,8 +1136,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1389,6 +1327,36 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id) +{ + u32 grbm_soft_reset; + u32 tmp; + + grbm_soft_reset = REG_SET_FIELD(0, + GRBM_SOFT_RESET, SOFT_RESET_SDMA0, + 1); + grbm_soft_reset <<= instance_id; + + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + return 0; +} + +static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = { + .stop_kernel_queue = &sdma_v5_0_stop_queue, + .start_kernel_queue = &sdma_v5_0_restore_queue, + .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine, +}; + static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1431,6 +1399,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1572,32 +1541,25 @@ static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block) static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int i, j, r; - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + u32 inst_id = ring->me; + + return amdgpu_sdma_reset_engine(adev, inst_id); +} + +static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) +{ + u32 f32_cntl, freeze, cntl, stat1_reg; + struct amdgpu_device *adev = ring->adev; + int i, j, r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; - } - - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } - + i = ring->me; amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* stop queue */ - ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - - rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + sdma_v5_0_gfx_stop(adev, 1 << i); /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); @@ -1628,34 +1590,29 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); - - /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ - preempt = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); - preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); - - soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; - - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - udelay(50); - - soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - /* unfreeze*/ - freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); - freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); - - r = sdma_v5_0_gfx_resume_instance(adev, i, true); - err0: amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return r; } +static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 inst_id = ring->me; + u32 freeze; + int r; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + /* unfreeze*/ + freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE)); + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze); + + r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2b39a03ff0c1..a6e612b4a892 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -113,6 +113,8 @@ static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring); +static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring); static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { @@ -394,11 +396,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if ((flags & AMDGPU_FENCE_FLAG_INT)) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -407,15 +407,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se * sdma_v5_2_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer - * + * @inst_mask: mask of dma engine instances to be disabled * Stop the gfx async dma ring buffers. */ -static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) +static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { u32 rb_cntl, ib_cntl; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -506,9 +506,11 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; + uint32_t inst_mask; + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!enable) { - sdma_v5_2_gfx_stop(adev); + sdma_v5_2_gfx_stop(adev, inst_mask); sdma_v5_2_rlc_stop(adev); } @@ -761,37 +763,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) return 0; } +static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id) +{ + u32 grbm_soft_reset; + u32 tmp; + + grbm_soft_reset = REG_SET_FIELD(0, + GRBM_SOFT_RESET, SOFT_RESET_SDMA0, + 1); + grbm_soft_reset <<= instance_id; + + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + return 0; +} + static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 grbm_soft_reset; - u32 tmp; int i; for (i = 0; i < adev->sdma.num_instances; i++) { - grbm_soft_reset = REG_SET_FIELD(0, - GRBM_SOFT_RESET, SOFT_RESET_SDMA0, - 1); - grbm_soft_reset <<= i; - - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - + sdma_v5_2_soft_reset_engine(adev, i); udelay(50); } return 0; } +static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = { + .stop_kernel_queue = &sdma_v5_2_stop_queue, + .start_kernel_queue = &sdma_v5_2_restore_queue, + .soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine, +}; + /** * sdma_v5_2_start - setup and start the async dma engines * @@ -903,33 +917,22 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -942,10 +945,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -957,8 +957,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -981,37 +980,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); - - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1039,10 +1024,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1053,8 +1035,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1337,6 +1318,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1472,32 +1454,25 @@ static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block) static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int i, j, r; - u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + u32 inst_id = ring->me; + + return amdgpu_sdma_reset_engine(adev, inst_id); +} + +static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) +{ + u32 f32_cntl, freeze, cntl, stat1_reg; + struct amdgpu_device *adev = ring->adev; + int i, j, r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; - } - - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } - + i = ring->me; amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* stop queue */ - ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - - rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + sdma_v5_2_gfx_stop(adev, 1 << i); /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); @@ -1530,35 +1505,30 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); - /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ - preempt = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); - preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); - - soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; - - - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - udelay(50); - - soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); - - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); - - /* unfreeze and unhalt */ - freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); - freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); - - r = sdma_v5_2_gfx_resume_instance(adev, i, true); - err0: amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return r; } +static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 inst_id = ring->me; + u32 freeze; + int r; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + /* unfreeze and unhalt */ + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE)); + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze); + + r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index c214c3d2149b..da5b5d64f137 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -43,6 +43,7 @@ #include "sdma_common.h" #include "sdma_v6_0.h" #include "v11_structs.h" +#include "mes_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); @@ -376,11 +377,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -891,6 +890,9 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + return 0; } @@ -917,33 +919,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -956,10 +947,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -971,8 +959,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -995,37 +982,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); - - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1053,10 +1026,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1067,8 +1037,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1300,6 +1269,23 @@ static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: + adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) return r; @@ -1334,6 +1320,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) ring->ring_obj = NULL; ring->use_doorbell = true; ring->me = i; + ring->no_user_submission = adev->sdma.no_user_submission; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); @@ -1376,6 +1363,10 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + /* add firmware version checks here */ + if (0 && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; @@ -1399,11 +1390,39 @@ static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int i, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) { + for (i = 0; i < adev->sdma.num_instances; i++) { + irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i; + if (enable) + r = amdgpu_irq_get(adev, &adev->sdma.trap_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->sdma.trap_irq, + irq_type); + if (r) + return r; + } + } + + return 0; +} + static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; - return sdma_v6_0_start(adev); + r = sdma_v6_0_start(adev); + if (r) + return r; + + return sdma_v6_0_set_userq_trap_interrupts(adev, true); } static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1415,6 +1434,7 @@ static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); + sdma_v6_0_set_userq_trap_interrupts(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index b2706221df99..befe013b11a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -42,6 +42,7 @@ #include "sdma_common.h" #include "sdma_v7_0.h" #include "v12_structs.h" +#include "mes_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); @@ -204,66 +205,39 @@ static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -407,11 +381,9 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -935,6 +907,9 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + return 0; } @@ -961,33 +936,22 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1000,10 +964,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1015,8 +976,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1039,37 +999,23 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); - - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1097,10 +1043,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1111,8 +1054,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1312,6 +1254,23 @@ static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; + switch (amdgpu_user_queue) { + case -1: + case 0: + default: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = true; + break; + case 1: + adev->sdma.no_user_submission = false; + adev->sdma.disable_uq = false; + break; + case 2: + adev->sdma.no_user_submission = true; + adev->sdma.disable_uq = false; + break; + } + r = amdgpu_sdma_init_microcode(adev, 0, true); if (r) { DRM_ERROR("Failed to init sdma firmware!\n"); @@ -1347,6 +1306,7 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) ring->ring_obj = NULL; ring->use_doorbell = true; ring->me = i; + ring->no_user_submission = adev->sdma.no_user_submission; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); @@ -1378,6 +1338,10 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + /* add firmware version checks here */ + if (0 && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + return r; } @@ -1400,11 +1364,39 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev, + bool enable) +{ + unsigned int irq_type; + int i, r; + + if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) { + for (i = 0; i < adev->sdma.num_instances; i++) { + irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i; + if (enable) + r = amdgpu_irq_get(adev, &adev->sdma.trap_irq, + irq_type); + else + r = amdgpu_irq_put(adev, &adev->sdma.trap_irq, + irq_type); + if (r) + return r; + } + } + + return 0; +} + static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int r; - return sdma_v7_0_start(adev); + r = sdma_v7_0_start(adev); + if (r) + return r; + + return sdma_v7_0_set_userq_trap_interrupts(adev, true); } static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) @@ -1416,6 +1408,7 @@ static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v7_0_ctx_switch_enable(adev, false); sdma_v7_0_enable(adev, false); + sdma_v7_0_set_userq_trap_interrupts(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 2247f6a94858..e0f139de7991 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -35,6 +35,7 @@ #include "amdgpu_vce.h" #include "atom.h" #include "amd_pcie.h" + #include "si_dpm.h" #include "sid.h" #include "si_ih.h" @@ -44,17 +45,30 @@ #include "dce_v6_0.h" #include "si.h" #include "uvd_v3_1.h" -#include "amdgpu_vkms.h" + +#include "uvd/uvd_4_0_d.h" + +#include "smu/smu_6_0_d.h" +#include "smu/smu_6_0_sh_mask.h" + #include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" + #include "gmc/gmc_6_0_d.h" +#include"gmc/gmc_6_0_sh_mask.h" + #include "dce/dce_6_0_d.h" -#include "uvd/uvd_4_0_d.h" +#include "dce/dce_6_0_sh_mask.h" + #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" +#include "si_enums.h" #include "amdgpu_dm.h" +#include "amdgpu_vkms.h" static const u32 tahiti_golden_registers[] = { @@ -1071,8 +1085,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg) u32 r; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, (reg)); - r = RREG32(SMC_IND_DATA_0); + WREG32(mmSMC_IND_INDEX_0, (reg)); + r = RREG32(mmSMC_IND_DATA_0); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return r; } @@ -1082,8 +1096,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) unsigned long flags; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, (reg)); - WREG32(SMC_IND_DATA_0, (v)); + WREG32(mmSMC_IND_INDEX_0, (reg)); + WREG32(mmSMC_IND_DATA_0, (v)); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } @@ -1110,20 +1124,20 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) } static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { - {GRBM_STATUS}, + {mmGRBM_STATUS}, {mmGRBM_STATUS2}, {mmGRBM_STATUS_SE0}, {mmGRBM_STATUS_SE1}, {mmSRBM_STATUS}, {mmSRBM_STATUS2}, - {DMA_STATUS_REG + DMA0_REGISTER_OFFSET}, - {DMA_STATUS_REG + DMA1_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET}, + {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET}, {mmCP_STAT}, {mmCP_STALLED_STAT1}, {mmCP_STALLED_STAT2}, {mmCP_STALLED_STAT3}, - {GB_ADDR_CONFIG}, - {MC_ARB_RAMCFG}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, {mmGB_TILE_MODE0}, {mmGB_TILE_MODE1}, {mmGB_TILE_MODE2}, @@ -1156,7 +1170,7 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmGB_TILE_MODE29}, {mmGB_TILE_MODE30}, {mmGB_TILE_MODE31}, - {CC_RB_BACKEND_DISABLE, true}, + {mmCC_RB_BACKEND_DISABLE, true}, {mmGC_USER_RB_BACKEND_DISABLE, true}, {mmPA_SC_RASTER_CONFIG, true}, }; @@ -1264,37 +1278,37 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) u32 rom_cntl; bool r; - bus_cntl = RREG32(R600_BUS_CNTL); + bus_cntl = RREG32(mmBUS_CNTL); if (adev->mode_info.num_crtc) { - d1vga_control = RREG32(AVIVO_D1VGA_CONTROL); - d2vga_control = RREG32(AVIVO_D2VGA_CONTROL); - vga_render_control = RREG32(VGA_RENDER_CONTROL); + d1vga_control = RREG32(mmD1VGA_CONTROL); + d2vga_control = RREG32(mmD2VGA_CONTROL); + vga_render_control = RREG32(mmVGA_RENDER_CONTROL); } rom_cntl = RREG32(R600_ROM_CNTL); /* enable the rom */ - WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS)); + WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK)); if (adev->mode_info.num_crtc) { /* Disable VGA mode */ - WREG32(AVIVO_D1VGA_CONTROL, - (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | - AVIVO_DVGA_CONTROL_TIMING_SELECT))); - WREG32(AVIVO_D2VGA_CONTROL, - (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE | - AVIVO_DVGA_CONTROL_TIMING_SELECT))); - WREG32(VGA_RENDER_CONTROL, - (vga_render_control & C_000300_VGA_VSTATUS_CNTL)); + WREG32(mmD1VGA_CONTROL, + (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK | + D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK))); + WREG32(mmD2VGA_CONTROL, + (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK | + D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK))); + WREG32(mmVGA_RENDER_CONTROL, + (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK)); } WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE); r = amdgpu_read_bios(adev); /* restore regs */ - WREG32(R600_BUS_CNTL, bus_cntl); + WREG32(mmBUS_CNTL, bus_cntl); if (adev->mode_info.num_crtc) { - WREG32(AVIVO_D1VGA_CONTROL, d1vga_control); - WREG32(AVIVO_D2VGA_CONTROL, d2vga_control); - WREG32(VGA_RENDER_CONTROL, vga_render_control); + WREG32(mmD1VGA_CONTROL, d1vga_control); + WREG32(mmD2VGA_CONTROL, d2vga_control); + WREG32(mmVGA_RENDER_CONTROL, vga_render_control); } WREG32(R600_ROM_CNTL, rom_cntl); return r; @@ -1331,23 +1345,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev) { u32 tmp, i; - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_BYPASS_EN; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL_2); - tmp |= SPLL_CTLREQ_CHG; - WREG32(CG_SPLL_FUNC_CNTL_2, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2); + tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp); for (i = 0; i < adev->usec_timeout; i++) { - if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS) + if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK) break; udelay(1); } - tmp = RREG32(CG_SPLL_FUNC_CNTL_2); - tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE); - WREG32(CG_SPLL_FUNC_CNTL_2, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2); + tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK | + CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK); + WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp); tmp = RREG32(MPLL_CNTL_MODE); tmp &= ~MPLL_MCLK_SEL; @@ -1358,21 +1373,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev) { u32 tmp; - tmp = RREG32(SPLL_CNTL_MODE); - tmp |= SPLL_SW_DIR_CONTROL; - WREG32(SPLL_CNTL_MODE, tmp); + tmp = RREG32(mmSPLL_CNTL_MODE); + tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK; + WREG32(mmSPLL_CNTL_MODE, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_RESET; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(CG_SPLL_FUNC_CNTL); - tmp |= SPLL_SLEEP; - WREG32(CG_SPLL_FUNC_CNTL, tmp); + tmp = RREG32(mmCG_SPLL_FUNC_CNTL); + tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK; + WREG32(mmCG_SPLL_FUNC_CNTL, tmp); - tmp = RREG32(SPLL_CNTL_MODE); - tmp &= ~SPLL_SW_DIR_CONTROL; - WREG32(SPLL_CNTL_MODE, tmp); + tmp = RREG32(mmSPLL_CNTL_MODE); + tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK; + WREG32(mmSPLL_CNTL_MODE, tmp); } static int si_gpu_pci_config_reset(struct amdgpu_device *adev) @@ -1454,14 +1469,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state) { uint32_t temp; - temp = RREG32(CONFIG_CNTL); + temp = RREG32(mmCONFIG_CNTL); if (!state) { temp &= ~(1<<0); temp |= (1<<1); } else { temp &= ~(1<<1); } - WREG32(CONFIG_CNTL, temp); + WREG32(mmCONFIG_CNTL, temp); } static u32 si_get_xclk(struct amdgpu_device *adev) @@ -1469,12 +1484,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; u32 tmp; - tmp = RREG32(CG_CLKPIN_CNTL_2); - if (tmp & MUX_TCLK_TO_XCLK) + tmp = RREG32(mmCG_CLKPIN_CNTL_2); + if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK) return TCLK; - tmp = RREG32(CG_CLKPIN_CNTL); - if (tmp & XTALIN_DIVIDE) + tmp = RREG32(mmCG_CLKPIN_CNTL); + if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK) return reference_clock / 4; return reference_clock; @@ -1519,9 +1534,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return 0; - link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); - switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) { + switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) { case LC_LINK_WIDTH_X1: return 1; case LC_LINK_WIDTH_X2: @@ -1568,13 +1583,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) return; } - link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - link_width_cntl &= ~LC_LINK_WIDTH_MASK; - link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT; - link_width_cntl |= (LC_RECONFIG_NOW | - LC_RECONFIG_ARC_MISSING_ESCAPE); + link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK; + link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT; + link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK | + PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK); - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); } static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, @@ -2018,7 +2033,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = static uint32_t si_get_rev_id(struct amdgpu_device *adev) { - return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK) + return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK) >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT; } @@ -2239,9 +2254,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) return; - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> - LC_CURRENT_DATA_RATE_SHIFT; + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >> + PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { if (current_data_rate == 2) { DRM_INFO("PCIE gen 3 link speeds already enabled\n"); @@ -2268,17 +2283,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); - tmp = RREG32_PCIE(PCIE_LC_STATUS1); - max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; - current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; + tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); + max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT; + current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT; if (current_lw < max_lw) { - tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - if (tmp & LC_RENEGOTIATION_SUPPORT) { - tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); - tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); - tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) { + tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK); + tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT); + tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp); } } @@ -2301,13 +2316,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) PCI_EXP_LNKCTL2, &gpu_cfg2); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp |= LC_SET_QUIESCE; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp |= LC_REDO_EQ; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); mdelay(100); @@ -2333,16 +2348,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) (PCI_EXP_LNKCTL2_ENTER_COMP | PCI_EXP_LNKCTL2_TX_MARGIN)); - tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); - tmp &= ~LC_SET_QUIESCE; - WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4); + tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp); } } } - speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; - speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; - WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK; + speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl); tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) @@ -2354,13 +2369,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS, tmp16); - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; - WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; + WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl); for (i = 0; i < adev->usec_timeout; i++) { - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); - if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL); + if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0) break; udelay(1); } @@ -2418,121 +2433,121 @@ static void si_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); - data &= ~LC_XMIT_N_FTS_MASK; - data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL); + data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK; + data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data); - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); - data |= LC_GO_TO_RECOVERY; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data); - orig = data = RREG32_PCIE(PCIE_P_CNTL); - data |= P_IGNORE_EDB_ERR; + orig = data = RREG32_PCIE(ixPCIE_P_CNTL); + data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK; if (orig != data) - WREG32_PCIE(PCIE_P_CNTL, data); + WREG32_PCIE(ixPCIE_P_CNTL, data); - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); - data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); - data |= LC_PMI_TO_L1_DIS; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL); + data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK); + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (!disable_l0s) - data |= LC_L0S_INACTIVITY(7); + data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT); if (!disable_l1) { - data |= LC_L1_INACTIVITY(7); - data &= ~LC_PMI_TO_L1_DIS; + data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT); + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); if (!disable_plloff_in_l1) { bool clk_req_support; - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); - data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); - data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0); + data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1); - data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); - data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1); + data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0); - data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); - data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0); + data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1); - data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); - data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1); + data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data); if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) { - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); - data &= ~PLL_RAMP_UP_TIME_0_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0); + data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1); - data &= ~PLL_RAMP_UP_TIME_1_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1); + data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2); - data &= ~PLL_RAMP_UP_TIME_2_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2); + data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3); - data &= ~PLL_RAMP_UP_TIME_3_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3); + data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK; if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0); - data &= ~PLL_RAMP_UP_TIME_0_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0); + data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1); - data &= ~PLL_RAMP_UP_TIME_1_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1); + data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2); - data &= ~PLL_RAMP_UP_TIME_2_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2); + data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3); - data &= ~PLL_RAMP_UP_TIME_3_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3); + data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK; if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data); } - orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); - data &= ~LC_DYN_LANES_PWR_STATE_MASK; - data |= LC_DYN_LANES_PWR_STATE(3); + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL); + data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK; + data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT); if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data); - orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL); - data &= ~LS2_EXIT_TIME_MASK; + orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL); + data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK; if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) - data |= LS2_EXIT_TIME(5); + data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT); if (orig != data) - si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data); + si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data); - orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL); - data &= ~LS2_EXIT_TIME_MASK; + orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL); + data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK; if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) - data |= LS2_EXIT_TIME(5); + data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT); if (orig != data) - si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); + si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data); if (!disable_clkreq && !pci_is_root_bus(adev->pdev->bus)) { @@ -2548,64 +2563,64 @@ static void si_program_aspm(struct amdgpu_device *adev) } if (clk_req_support) { - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); - data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2); + data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data); - orig = data = RREG32(THM_CLK_CNTL); - data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); - data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); + orig = data = RREG32(mmTHM_CLK_CNTL); + data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK); + data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT); if (orig != data) - WREG32(THM_CLK_CNTL, data); + WREG32(mmTHM_CLK_CNTL, data); - orig = data = RREG32(MISC_CLK_CNTL); - data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); - data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); + orig = data = RREG32(mmMISC_CLK_CNTL); + data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK); + data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT); if (orig != data) - WREG32(MISC_CLK_CNTL, data); + WREG32(mmMISC_CLK_CNTL, data); - orig = data = RREG32(CG_CLKPIN_CNTL); - data &= ~BCLK_AS_XCLK; + orig = data = RREG32(mmCG_CLKPIN_CNTL); + data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK; if (orig != data) - WREG32(CG_CLKPIN_CNTL, data); + WREG32(mmCG_CLKPIN_CNTL, data); - orig = data = RREG32(CG_CLKPIN_CNTL_2); - data &= ~FORCE_BIF_REFCLK_EN; + orig = data = RREG32(mmCG_CLKPIN_CNTL_2); + data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK; if (orig != data) - WREG32(CG_CLKPIN_CNTL_2, data); + WREG32(mmCG_CLKPIN_CNTL_2, data); - orig = data = RREG32(MPLL_BYPASSCLK_SEL); - data &= ~MPLL_CLKOUT_SEL_MASK; - data |= MPLL_CLKOUT_SEL(4); + orig = data = RREG32(mmMPLL_BYPASSCLK_SEL); + data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK; + data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT; if (orig != data) - WREG32(MPLL_BYPASSCLK_SEL, data); + WREG32(mmMPLL_BYPASSCLK_SEL, data); - orig = data = RREG32(SPLL_CNTL_MODE); - data &= ~SPLL_REFCLK_SEL_MASK; + orig = data = RREG32(mmSPLL_CNTL_MODE); + data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK; if (orig != data) - WREG32(SPLL_CNTL_MODE, data); + WREG32(mmSPLL_CNTL_MODE, data); } } } else { if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); } - orig = data = RREG32_PCIE(PCIE_CNTL2); - data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; + orig = data = RREG32_PCIE(ixPCIE_CNTL2); + data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK; if (orig != data) - WREG32_PCIE(PCIE_CNTL2, data); + WREG32_PCIE(ixPCIE_CNTL2, data); if (!disable_l0s) { - data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); - if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { - data = RREG32_PCIE(PCIE_LC_STATUS1); - if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { - orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); - data &= ~LC_L0S_INACTIVITY_MASK; + data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL); + if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) { + data = RREG32_PCIE(ixPCIE_LC_STATUS1); + if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) { + orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; if (orig != data) - WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index e2089c8da71b..7f18e4875287 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -27,6 +27,8 @@ #include "si.h" #include "sid.h" +#include "oss/oss_1_0_d.h" +#include "oss/oss_1_0_sh_mask.h" const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { DMA0_REGISTER_OFFSET, @@ -38,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); +/** + * si_dma_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) { return *ring->rptr_cpu_addr; } +/** + * si_dma_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (SI). + */ static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; + return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; } static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) @@ -56,7 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -117,9 +133,9 @@ static void si_dma_stop(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { /* dma0 */ - rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); - rb_cntl &= ~DMA_RB_ENABLE; - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]); + rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK; + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); } } @@ -133,44 +149,44 @@ static int si_dma_start(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); - WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); + WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); rb_cntl = rb_bufsz << 1; #ifdef __BIG_ENDIAN - rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK; #endif - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(DMA_RB_RPTR + sdma_offsets[i], 0); - WREG32(DMA_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0); rptr_addr = ring->rptr_gpu_addr; - WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); - WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr)); + WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF); - rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; + rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK; - WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); + WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); /* enable DMA IBs */ - ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; + ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK; #ifdef __BIG_ENDIAN - ib_cntl |= DMA_IB_SWAP_ENABLE; + ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK; #endif - WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl); + WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]); - dma_cntl &= ~CTXEMPTY_INT_ENABLE; - WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); + dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]); + dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK; + WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); - WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); + WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK); r = amdgpu_ring_test_helper(ring); if (r) @@ -461,7 +477,7 @@ static int si_dma_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->sdma.num_instances = 2; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; si_dma_set_ring_funcs(adev); si_dma_set_buffer_funcs(adev); @@ -545,9 +561,9 @@ static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 tmp = RREG32(SRBM_STATUS2); + u32 tmp = RREG32(mmSRBM_STATUS2); - if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) + if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK)) return false; return true; @@ -583,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE0: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -599,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev, case AMDGPU_SDMA_IRQ_INSTANCE1: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl &= ~TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; case AMDGPU_IRQ_STATE_ENABLE: - sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET); - sdma_cntl |= TRAP_ENABLE; - WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); + sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET); + sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK; + WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl); break; default: break; @@ -645,11 +661,11 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data &= ~MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); - WREG32(DMA_CLK_CTRL + offset, 0x00000100); + WREG32(mmDMA_POWER_CNTL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, 0x00000100); } } else { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -657,15 +673,15 @@ static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, offset = DMA0_REGISTER_OFFSET; else offset = DMA1_REGISTER_OFFSET; - orig = data = RREG32(DMA_POWER_CNTL + offset); - data |= MEM_POWER_OVERRIDE; + orig = data = RREG32(mmDMA_POWER_CNTL + offset); + data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (data != orig) - WREG32(DMA_POWER_CNTL + offset, data); + WREG32(mmDMA_POWER_CNTL + offset, data); - orig = data = RREG32(DMA_CLK_CTRL + offset); + orig = data = RREG32(mmDMA_CLK_CTRL + offset); data = 0xff000000; if (data != orig) - WREG32(DMA_CLK_CTRL + offset, data); + WREG32(mmDMA_CLK_CTRL + offset, data); } } @@ -679,11 +695,11 @@ static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, struct amdgpu_device *adev = ip_block->adev; - WREG32(DMA_PGFSM_WRITE, 0x00002000); - WREG32(DMA_PGFSM_CONFIG, 0x100010ff); + WREG32(mmDMA_PGFSM_WRITE, 0x00002000); + WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff); for (tmp = 0; tmp < 5; tmp++) - WREG32(DMA_PGFSM_WRITE, 0); + WREG32(mmDMA_PGFSM_WRITE, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index d656ef1fa6e1..6da65778292b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -23,115 +23,15 @@ #ifndef SI_ENUMS_H #define SI_ENUMS_H -#define VBLANK_INT_MASK (1 << 0) -#define DC_HPDx_INT_EN (1 << 16) -#define VBLANK_ACK (1 << 4) -#define VLINE_ACK (1 << 4) - -#define CURSOR_WIDTH 64 -#define CURSOR_HEIGHT 64 - -#define VGA_VSTATUS_CNTL 0xFFFCFFFF #define PRIORITY_MARK_MASK 0x7fff #define PRIORITY_OFF (1 << 16) #define PRIORITY_ALWAYS_ON (1 << 20) -#define INTERLEAVE_EN (1 << 0) - -#define LATENCY_WATERMARK_MASK(x) ((x) << 16) -#define DC_LB_MEMORY_CONFIG(x) ((x) << 20) -#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8) - -#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0) -#define GRPH_ENDIAN_NONE 0 -#define GRPH_ENDIAN_8IN16 1 -#define GRPH_ENDIAN_8IN32 2 -#define GRPH_ENDIAN_8IN64 3 -#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) -#define GRPH_RED_SEL_R 0 -#define GRPH_RED_SEL_G 1 -#define GRPH_RED_SEL_B 2 -#define GRPH_RED_SEL_A 3 -#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) -#define GRPH_GREEN_SEL_G 0 -#define GRPH_GREEN_SEL_B 1 -#define GRPH_GREEN_SEL_A 2 -#define GRPH_GREEN_SEL_R 3 -#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) -#define GRPH_BLUE_SEL_B 0 -#define GRPH_BLUE_SEL_A 1 -#define GRPH_BLUE_SEL_R 2 -#define GRPH_BLUE_SEL_G 3 -#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) -#define GRPH_ALPHA_SEL_A 0 -#define GRPH_ALPHA_SEL_R 1 -#define GRPH_ALPHA_SEL_G 2 -#define GRPH_ALPHA_SEL_B 3 - -#define GRPH_DEPTH(x) (((x) & 0x3) << 0) -#define GRPH_DEPTH_8BPP 0 -#define GRPH_DEPTH_16BPP 1 -#define GRPH_DEPTH_32BPP 2 - -#define GRPH_FORMAT(x) (((x) & 0x7) << 8) -#define GRPH_FORMAT_INDEXED 0 -#define GRPH_FORMAT_ARGB1555 0 -#define GRPH_FORMAT_ARGB565 1 -#define GRPH_FORMAT_ARGB4444 2 -#define GRPH_FORMAT_AI88 3 -#define GRPH_FORMAT_MONO16 4 -#define GRPH_FORMAT_BGRA5551 5 -#define GRPH_FORMAT_ARGB8888 0 -#define GRPH_FORMAT_ARGB2101010 1 -#define GRPH_FORMAT_32BPP_DIG 2 -#define GRPH_FORMAT_8B_ARGB2101010 3 -#define GRPH_FORMAT_BGRA1010102 4 -#define GRPH_FORMAT_8B_BGRA1010102 5 -#define GRPH_FORMAT_RGB111110 6 -#define GRPH_FORMAT_BGR101111 7 - -#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2) -#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define GRPH_ARRAY_LINEAR_GENERAL 0 -#define GRPH_ARRAY_LINEAR_ALIGNED 1 -#define GRPH_ARRAY_1D_TILED_THIN1 2 -#define GRPH_ARRAY_2D_TILED_THIN1 4 -#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13) -#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6) -#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11) -#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18) -#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24) - -#define CURSOR_EN (1 << 0) -#define CURSOR_MODE(x) (((x) & 0x3) << 8) -#define CURSOR_MONO 0 -#define CURSOR_24_1 1 -#define CURSOR_24_8_PRE_MULT 2 -#define CURSOR_24_8_UNPRE_MULT 3 -#define CURSOR_2X_MAGNIFY (1 << 16) -#define CURSOR_FORCE_MC_ON (1 << 20) -#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24) -#define CURSOR_URGENT_ALWAYS 0 -#define CURSOR_URGENT_1_8 1 -#define CURSOR_URGENT_1_4 2 -#define CURSOR_URGENT_3_8 3 -#define CURSOR_URGENT_1_2 4 -#define CURSOR_UPDATE_PENDING (1 << 0) -#define CURSOR_UPDATE_TAKEN (1 << 1) -#define CURSOR_UPDATE_LOCK (1 << 16) -#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) - - -#define ES_AND_GS_AUTO 3 -#define RADEON_PACKET_TYPE3 3 -#define CE_PARTITION_BASE 3 -#define BUF_SWAP_32BIT (2 << 16) #define GFX_POWER_STATUS (1 << 1) #define GFX_CLOCK_STATUS (1 << 2) #define GFX_LS_STATUS (1 << 3) -#define RLC_BUSY_STATUS (1 << 0) +#define RLC_BUSY_STATUS (1 << 0) #define RLC_PUD(x) ((x) << 0) #define RLC_PUD_MASK (0xff << 0) #define RLC_PDD(x) ((x) << 8) @@ -140,140 +40,8 @@ #define RLC_TTPD_MASK (0xff << 16) #define RLC_MSD(x) ((x) << 24) #define RLC_MSD_MASK (0xff << 24) -#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) -#define WRITE_DATA_DST_SEL(x) ((x) << 8) -#define EVENT_TYPE(x) ((x) << 0) -#define EVENT_INDEX(x) ((x) << 8) -#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) -#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) -#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) -#define GFX6_NUM_GFX_RINGS 1 -#define GFX6_NUM_COMPUTE_RINGS 2 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D -#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \ - (((op) & 0xFF) << 8) | \ - ((n) & 0x3FFF) << 16) -#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) -#define PACKET3_NOP 0x10 -#define PACKET3_SET_BASE 0x11 -#define PACKET3_BASE_INDEX(x) ((x) << 0) -#define PACKET3_CLEAR_STATE 0x12 -#define PACKET3_INDEX_BUFFER_SIZE 0x13 -#define PACKET3_DISPATCH_DIRECT 0x15 -#define PACKET3_DISPATCH_INDIRECT 0x16 -#define PACKET3_ALLOC_GDS 0x1B -#define PACKET3_WRITE_GDS_RAM 0x1C -#define PACKET3_ATOMIC_GDS 0x1D -#define PACKET3_ATOMIC 0x1E -#define PACKET3_OCCLUSION_QUERY 0x1F -#define PACKET3_SET_PREDICATION 0x20 -#define PACKET3_REG_RMW 0x21 -#define PACKET3_COND_EXEC 0x22 -#define PACKET3_PRED_EXEC 0x23 -#define PACKET3_DRAW_INDIRECT 0x24 -#define PACKET3_DRAW_INDEX_INDIRECT 0x25 -#define PACKET3_INDEX_BASE 0x26 -#define PACKET3_DRAW_INDEX_2 0x27 -#define PACKET3_CONTEXT_CONTROL 0x28 -#define PACKET3_INDEX_TYPE 0x2A -#define PACKET3_DRAW_INDIRECT_MULTI 0x2C -#define PACKET3_DRAW_INDEX_AUTO 0x2D -#define PACKET3_DRAW_INDEX_IMMD 0x2E -#define PACKET3_NUM_INSTANCES 0x2F -#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 -#define PACKET3_INDIRECT_BUFFER_CONST 0x31 -#define PACKET3_INDIRECT_BUFFER 0x3F -#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 -#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 -#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36 -#define PACKET3_WRITE_DATA 0x37 -#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 -#define PACKET3_MEM_SEMAPHORE 0x39 -#define PACKET3_MPEG_INDEX 0x3A -#define PACKET3_COPY_DW 0x3B -#define PACKET3_WAIT_REG_MEM 0x3C -#define PACKET3_MEM_WRITE 0x3D -#define PACKET3_COPY_DATA 0x40 -#define PACKET3_CP_DMA 0x41 -# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20) -# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27) -# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29) -# define PACKET3_CP_DMA_CP_SYNC (1 << 31) -# define PACKET3_CP_DMA_DIS_WC (1 << 21) -# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22) -# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24) -# define PACKET3_CP_DMA_CMD_SAS (1 << 26) -# define PACKET3_CP_DMA_CMD_DAS (1 << 27) -# define PACKET3_CP_DMA_CMD_SAIC (1 << 28) -# define PACKET3_CP_DMA_CMD_DAIC (1 << 29) -# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30) -#define PACKET3_PFP_SYNC_ME 0x42 -#define PACKET3_SURFACE_SYNC 0x43 -# define PACKET3_DEST_BASE_0_ENA (1 << 0) -# define PACKET3_DEST_BASE_1_ENA (1 << 1) -# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) -# define PACKET3_CB1_DEST_BASE_ENA (1 << 7) -# define PACKET3_CB2_DEST_BASE_ENA (1 << 8) -# define PACKET3_CB3_DEST_BASE_ENA (1 << 9) -# define PACKET3_CB4_DEST_BASE_ENA (1 << 10) -# define PACKET3_CB5_DEST_BASE_ENA (1 << 11) -# define PACKET3_CB6_DEST_BASE_ENA (1 << 12) -# define PACKET3_CB7_DEST_BASE_ENA (1 << 13) -# define PACKET3_DB_DEST_BASE_ENA (1 << 14) -# define PACKET3_DEST_BASE_2_ENA (1 << 19) -# define PACKET3_DEST_BASE_3_ENA (1 << 21) -# define PACKET3_TCL1_ACTION_ENA (1 << 22) -# define PACKET3_TC_ACTION_ENA (1 << 23) -# define PACKET3_CB_ACTION_ENA (1 << 25) -# define PACKET3_DB_ACTION_ENA (1 << 26) -# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27) -# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29) -#define PACKET3_ME_INITIALIZE 0x44 -#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) -#define PACKET3_COND_WRITE 0x45 -#define PACKET3_EVENT_WRITE 0x46 -#define PACKET3_EVENT_WRITE_EOP 0x47 -#define PACKET3_EVENT_WRITE_EOS 0x48 -#define PACKET3_PREAMBLE_CNTL 0x4A -# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) -# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) -#define PACKET3_ONE_REG_WRITE 0x57 -#define PACKET3_LOAD_CONFIG_REG 0x5F -#define PACKET3_LOAD_CONTEXT_REG 0x60 -#define PACKET3_LOAD_SH_REG 0x61 -#define PACKET3_SET_CONFIG_REG 0x68 -#define PACKET3_SET_CONFIG_REG_START 0x00002000 -#define PACKET3_SET_CONFIG_REG_END 0x00002c00 -#define PACKET3_SET_CONTEXT_REG 0x69 -#define PACKET3_SET_CONTEXT_REG_START 0x000a000 -#define PACKET3_SET_CONTEXT_REG_END 0x000a400 -#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 -#define PACKET3_SET_RESOURCE_INDIRECT 0x74 -#define PACKET3_SET_SH_REG 0x76 -#define PACKET3_SET_SH_REG_START 0x00002c00 -#define PACKET3_SET_SH_REG_END 0x00003000 -#define PACKET3_SET_SH_REG_OFFSET 0x77 -#define PACKET3_ME_WRITE 0x7A -#define PACKET3_SCRATCH_RAM_WRITE 0x7D -#define PACKET3_SCRATCH_RAM_READ 0x7E -#define PACKET3_CE_WRITE 0x7F -#define PACKET3_LOAD_CONST_RAM 0x80 -#define PACKET3_WRITE_CONST_RAM 0x81 -#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82 -#define PACKET3_DUMP_CONST_RAM 0x83 -#define PACKET3_INCREMENT_CE_COUNTER 0x84 -#define PACKET3_INCREMENT_DE_COUNTER 0x85 -#define PACKET3_WAIT_ON_CE_COUNTER 0x86 -#define PACKET3_WAIT_ON_DE_COUNTER 0x87 -#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 -#define PACKET3_SET_CE_DE_COUNTERS 0x89 -#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A -#define PACKET3_SWITCH_BUFFER 0x8B -#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12) -#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) -#define PACKET3_SEM_SEL_WAIT (0x7 << 29) - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 5c38e1fb1dca..1df00f8a2406 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -27,6 +27,7 @@ #include "amdgpu_ih.h" #include "sid.h" #include "si_ih.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" @@ -213,7 +214,7 @@ static int si_ih_resume(struct amdgpu_ip_block *ip_block) static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - u32 tmp = RREG32(SRBM_STATUS); + u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) return false; @@ -239,23 +240,23 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; - u32 tmp = RREG32(SRBM_STATUS); + u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK; if (srbm_soft_reset) { - tmp = RREG32(SRBM_SOFT_RESET); + tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); udelay(50); tmp &= ~srbm_soft_reset; - WREG32(SRBM_SOFT_RESET, tmp); - tmp = RREG32(SRBM_SOFT_RESET); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); udelay(50); } diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index cbf232f5235b..cbd4f8951cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -24,43 +24,12 @@ #ifndef SI_H #define SI_H -#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2 - -#define SI_MAX_SH_GPRS 256 -#define SI_MAX_TEMP_GPRS 16 -#define SI_MAX_SH_THREADS 256 -#define SI_MAX_SH_STACK_ENTRIES 4096 -#define SI_MAX_FRC_EOV_CNT 16384 -#define SI_MAX_BACKENDS 8 -#define SI_MAX_BACKENDS_MASK 0xFF -#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F -#define SI_MAX_SIMDS 12 -#define SI_MAX_SIMDS_MASK 0x0FFF -#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF -#define SI_MAX_PIPES 8 -#define SI_MAX_PIPES_MASK 0xFF -#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F -#define SI_MAX_LDS_NUM 0xFFFF -#define SI_MAX_TCC 16 -#define SI_MAX_TCC_MASK 0xFFFF #define SI_MAX_CTLACKS_ASSERTION_WAIT 100 -/* SMC IND accessor regs */ -#define SMC_IND_INDEX_0 0x80 -#define SMC_IND_DATA_0 0x81 - -#define SMC_IND_ACCESS_CNTL 0x8A -# define AUTO_INCREMENT_IND_0 (1 << 0) -#define SMC_MESSAGE_0 0x8B -#define SMC_RESP_0 0x8C - /* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */ #define SMC_CG_IND_START 0xc0030000 #define SMC_CG_IND_END 0xc0040000 -#define CG_CGTT_LOCAL_0 0x400 -#define CG_CGTT_LOCAL_1 0x401 - /* SMC IND registers */ #define SMC_SYSCON_RESET_CNTL 0x80000000 # define RST_REG (1 << 0) @@ -68,9 +37,6 @@ # define CK_DISABLE (1 << 0) # define CKEN (1 << 24) -#define VGA_HDP_CONTROL 0xCA -#define VGA_MEMORY_DISABLE (1 << 4) - #define DCCG_DISP_SLOW_SELECT_REG 0x13F #define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0) #define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0) @@ -79,47 +45,6 @@ #define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4) #define DCCG_DISP2_SLOW_SELECT_SHIFT 4 -#define CG_SPLL_FUNC_CNTL 0x180 -#define SPLL_RESET (1 << 0) -#define SPLL_SLEEP (1 << 1) -#define SPLL_BYPASS_EN (1 << 3) -#define SPLL_REF_DIV(x) ((x) << 4) -#define SPLL_REF_DIV_MASK (0x3f << 4) -#define SPLL_PDIV_A(x) ((x) << 20) -#define SPLL_PDIV_A_MASK (0x7f << 20) -#define SPLL_PDIV_A_SHIFT 20 -#define CG_SPLL_FUNC_CNTL_2 0x181 -#define SCLK_MUX_SEL(x) ((x) << 0) -#define SCLK_MUX_SEL_MASK (0x1ff << 0) -#define SPLL_CTLREQ_CHG (1 << 23) -#define SCLK_MUX_UPDATE (1 << 26) -#define CG_SPLL_FUNC_CNTL_3 0x182 -#define SPLL_FB_DIV(x) ((x) << 0) -#define SPLL_FB_DIV_MASK (0x3ffffff << 0) -#define SPLL_FB_DIV_SHIFT 0 -#define SPLL_DITHEN (1 << 28) -#define CG_SPLL_FUNC_CNTL_4 0x183 - -#define SPLL_STATUS 0x185 -#define SPLL_CHG_STATUS (1 << 1) -#define SPLL_CNTL_MODE 0x186 -#define SPLL_SW_DIR_CONTROL (1 << 0) -# define SPLL_REFCLK_SEL(x) ((x) << 26) -# define SPLL_REFCLK_SEL_MASK (3 << 26) - -#define CG_SPLL_SPREAD_SPECTRUM 0x188 -#define SSEN (1 << 0) -#define CLK_S(x) ((x) << 4) -#define CLK_S_MASK (0xfff << 4) -#define CLK_S_SHIFT 4 -#define CG_SPLL_SPREAD_SPECTRUM_2 0x189 -#define CLK_V(x) ((x) << 0) -#define CLK_V_MASK (0x3ffffff << 0) -#define CLK_V_SHIFT 0 - -#define CG_SPLL_AUTOSCALE_CNTL 0x18b -# define AUTOSCALE_ON_SS_CLEAR (1 << 9) - /* discrete uvd clocks */ #define CG_UPLL_FUNC_CNTL 0x18d # define UPLL_RESET_MASK 0x00000001 @@ -149,317 +74,13 @@ #define CG_UPLL_SPREAD_SPECTRUM 0x194 # define SSEN_MASK 0x00000001 -#define MPLL_BYPASSCLK_SEL 0x197 -# define MPLL_CLKOUT_SEL(x) ((x) << 8) -# define MPLL_CLKOUT_SEL_MASK 0xFF00 - -#define CG_CLKPIN_CNTL 0x198 -# define XTALIN_DIVIDE (1 << 1) -# define BCLK_AS_XCLK (1 << 2) -#define CG_CLKPIN_CNTL_2 0x199 -# define FORCE_BIF_REFCLK_EN (1 << 3) -# define MUX_TCLK_TO_XCLK (1 << 8) - -#define THM_CLK_CNTL 0x19b -# define CMON_CLK_SEL(x) ((x) << 0) -# define CMON_CLK_SEL_MASK 0xFF -# define TMON_CLK_SEL(x) ((x) << 8) -# define TMON_CLK_SEL_MASK 0xFF00 -#define MISC_CLK_CNTL 0x19c -# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0) -# define DEEP_SLEEP_CLK_SEL_MASK 0xFF -# define ZCLK_SEL(x) ((x) << 8) -# define ZCLK_SEL_MASK 0xFF00 - -#define CG_THERMAL_CTRL 0x1c0 -#define DPM_EVENT_SRC(x) ((x) << 0) -#define DPM_EVENT_SRC_MASK (7 << 0) -#define DIG_THERM_DPM(x) ((x) << 14) -#define DIG_THERM_DPM_MASK 0x003FC000 -#define DIG_THERM_DPM_SHIFT 14 -#define CG_THERMAL_STATUS 0x1c1 -#define FDO_PWM_DUTY(x) ((x) << 9) -#define FDO_PWM_DUTY_MASK (0xff << 9) -#define FDO_PWM_DUTY_SHIFT 9 -#define CG_THERMAL_INT 0x1c2 -#define DIG_THERM_INTH(x) ((x) << 8) -#define DIG_THERM_INTH_MASK 0x0000FF00 -#define DIG_THERM_INTH_SHIFT 8 -#define DIG_THERM_INTL(x) ((x) << 16) -#define DIG_THERM_INTL_MASK 0x00FF0000 -#define DIG_THERM_INTL_SHIFT 16 -#define THERM_INT_MASK_HIGH (1 << 24) -#define THERM_INT_MASK_LOW (1 << 25) - -#define CG_MULT_THERMAL_CTRL 0x1c4 -#define TEMP_SEL(x) ((x) << 20) -#define TEMP_SEL_MASK (0xff << 20) -#define TEMP_SEL_SHIFT 20 -#define CG_MULT_THERMAL_STATUS 0x1c5 -#define ASIC_MAX_TEMP(x) ((x) << 0) -#define ASIC_MAX_TEMP_MASK 0x000001ff -#define ASIC_MAX_TEMP_SHIFT 0 -#define CTF_TEMP(x) ((x) << 9) -#define CTF_TEMP_MASK 0x0003fe00 -#define CTF_TEMP_SHIFT 9 - -#define CG_FDO_CTRL0 0x1d5 -#define FDO_STATIC_DUTY(x) ((x) << 0) -#define FDO_STATIC_DUTY_MASK 0x000000FF -#define FDO_STATIC_DUTY_SHIFT 0 -#define CG_FDO_CTRL1 0x1d6 -#define FMAX_DUTY100(x) ((x) << 0) -#define FMAX_DUTY100_MASK 0x000000FF -#define FMAX_DUTY100_SHIFT 0 -#define CG_FDO_CTRL2 0x1d7 -#define TMIN(x) ((x) << 0) -#define TMIN_MASK 0x000000FF -#define TMIN_SHIFT 0 -#define FDO_PWM_MODE(x) ((x) << 11) -#define FDO_PWM_MODE_MASK (7 << 11) -#define FDO_PWM_MODE_SHIFT 11 -#define TACH_PWM_RESP_RATE(x) ((x) << 25) -#define TACH_PWM_RESP_RATE_MASK (0x7f << 25) -#define TACH_PWM_RESP_RATE_SHIFT 25 - -#define CG_TACH_CTRL 0x1dc -# define EDGE_PER_REV(x) ((x) << 0) -# define EDGE_PER_REV_MASK (0x7 << 0) -# define EDGE_PER_REV_SHIFT 0 -# define TARGET_PERIOD(x) ((x) << 3) -# define TARGET_PERIOD_MASK 0xfffffff8 -# define TARGET_PERIOD_SHIFT 3 -#define CG_TACH_STATUS 0x1dd -# define TACH_PERIOD(x) ((x) << 0) -# define TACH_PERIOD_MASK 0xffffffff -# define TACH_PERIOD_SHIFT 0 - -#define GENERAL_PWRMGT 0x1e0 -# define GLOBAL_PWRMGT_EN (1 << 0) -# define STATIC_PM_EN (1 << 1) -# define THERMAL_PROTECTION_DIS (1 << 2) -# define THERMAL_PROTECTION_TYPE (1 << 3) -# define SW_SMIO_INDEX(x) ((x) << 6) -# define SW_SMIO_INDEX_MASK (1 << 6) -# define SW_SMIO_INDEX_SHIFT 6 -# define VOLT_PWRMGT_EN (1 << 10) -# define DYN_SPREAD_SPECTRUM_EN (1 << 23) -#define CG_TPC 0x1e1 -#define SCLK_PWRMGT_CNTL 0x1e2 -# define SCLK_PWRMGT_OFF (1 << 0) -# define SCLK_LOW_D1 (1 << 1) -# define FIR_RESET (1 << 4) -# define FIR_FORCE_TREND_SEL (1 << 5) -# define FIR_TREND_MODE (1 << 6) -# define DYN_GFX_CLK_OFF_EN (1 << 7) -# define GFX_CLK_FORCE_ON (1 << 8) -# define GFX_CLK_REQUEST_OFF (1 << 9) -# define GFX_CLK_FORCE_OFF (1 << 10) -# define GFX_CLK_OFF_ACPI_D1 (1 << 11) -# define GFX_CLK_OFF_ACPI_D2 (1 << 12) -# define GFX_CLK_OFF_ACPI_D3 (1 << 13) -# define DYN_LIGHT_SLEEP_EN (1 << 14) - -#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6 -# define CURRENT_STATE_INDEX_MASK (0xf << 4) -# define CURRENT_STATE_INDEX_SHIFT 4 - -#define CG_FTV 0x1ef - -#define CG_FFCT_0 0x1f0 -# define UTC_0(x) ((x) << 0) -# define UTC_0_MASK (0x3ff << 0) -# define DTC_0(x) ((x) << 10) -# define DTC_0_MASK (0x3ff << 10) - -#define CG_BSP 0x1ff -# define BSP(x) ((x) << 0) -# define BSP_MASK (0xffff << 0) -# define BSU(x) ((x) << 16) -# define BSU_MASK (0xf << 16) -#define CG_AT 0x200 -# define CG_R(x) ((x) << 0) -# define CG_R_MASK (0xffff << 0) -# define CG_L(x) ((x) << 16) -# define CG_L_MASK (0xffff << 16) - -#define CG_GIT 0x201 -# define CG_GICST(x) ((x) << 0) -# define CG_GICST_MASK (0xffff << 0) -# define CG_GIPOT(x) ((x) << 16) -# define CG_GIPOT_MASK (0xffff << 16) - -#define CG_SSP 0x203 -# define SST(x) ((x) << 0) -# define SST_MASK (0xffff << 0) -# define SSTU(x) ((x) << 16) -# define SSTU_MASK (0xf << 16) - -#define CG_DISPLAY_GAP_CNTL 0x20a -# define DISP1_GAP(x) ((x) << 0) -# define DISP1_GAP_MASK (3 << 0) -# define DISP2_GAP(x) ((x) << 2) -# define DISP2_GAP_MASK (3 << 2) -# define VBI_TIMER_COUNT(x) ((x) << 4) -# define VBI_TIMER_COUNT_MASK (0x3fff << 4) -# define VBI_TIMER_UNIT(x) ((x) << 20) -# define VBI_TIMER_UNIT_MASK (7 << 20) -# define DISP1_GAP_MCHG(x) ((x) << 24) -# define DISP1_GAP_MCHG_MASK (3 << 24) -# define DISP2_GAP_MCHG(x) ((x) << 26) -# define DISP2_GAP_MCHG_MASK (3 << 26) - -#define CG_ULV_CONTROL 0x21e -#define CG_ULV_PARAMETER 0x21f - -#define SMC_SCRATCH0 0x221 - -#define CG_CAC_CTRL 0x22e -# define CAC_WINDOW(x) ((x) << 0) -# define CAC_WINDOW_MASK 0x00ffffff - -#define DMIF_ADDR_CONFIG 0x2F5 - -#define DMIF_ADDR_CALC 0x300 - -#define PIPE0_DMIF_BUFFER_CONTROL 0x0328 -# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0) -# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4) - -#define SRBM_STATUS 0x394 -#define GRBM_RQ_PENDING (1 << 5) -#define VMC_BUSY (1 << 8) -#define MCB_BUSY (1 << 9) -#define MCB_NON_DISPLAY_BUSY (1 << 10) -#define MCC_BUSY (1 << 11) -#define MCD_BUSY (1 << 12) -#define SEM_BUSY (1 << 14) -#define IH_BUSY (1 << 17) - -#define SRBM_SOFT_RESET 0x398 -#define SOFT_RESET_BIF (1 << 1) -#define SOFT_RESET_DC (1 << 5) -#define SOFT_RESET_DMA1 (1 << 6) -#define SOFT_RESET_GRBM (1 << 8) -#define SOFT_RESET_HDP (1 << 9) -#define SOFT_RESET_IH (1 << 10) -#define SOFT_RESET_MC (1 << 11) -#define SOFT_RESET_ROM (1 << 14) -#define SOFT_RESET_SEM (1 << 15) -#define SOFT_RESET_VMC (1 << 17) -#define SOFT_RESET_DMA (1 << 20) -#define SOFT_RESET_TST (1 << 21) -#define SOFT_RESET_REGBB (1 << 22) -#define SOFT_RESET_ORB (1 << 23) - -#define CC_SYS_RB_BACKEND_DISABLE 0x3A0 -#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1 - -#define SRBM_READ_ERROR 0x3A6 -#define SRBM_INT_CNTL 0x3A8 -#define SRBM_INT_ACK 0x3AA - -#define SRBM_STATUS2 0x3B1 -#define DMA_BUSY (1 << 5) -#define DMA1_BUSY (1 << 6) - -#define VM_L2_CNTL 0x500 -#define ENABLE_L2_CACHE (1 << 0) -#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1) -#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2) -#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4) -#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9) -#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10) -#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15) -#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19) -#define VM_L2_CNTL2 0x501 -#define INVALIDATE_ALL_L1_TLBS (1 << 0) -#define INVALIDATE_L2_CACHE (1 << 1) -#define INVALIDATE_CACHE_MODE(x) ((x) << 26) -#define INVALIDATE_PTE_AND_PDE_CACHES 0 -#define INVALIDATE_ONLY_PTE_CACHES 1 -#define INVALIDATE_ONLY_PDE_CACHES 2 -#define VM_L2_CNTL3 0x502 -#define BANK_SELECT(x) ((x) << 0) -#define L2_CACHE_UPDATE_MODE(x) ((x) << 6) -#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15) -#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20) -#define VM_L2_STATUS 0x503 -#define L2_BUSY (1 << 0) -#define VM_CONTEXT0_CNTL 0x504 -#define ENABLE_CONTEXT (1 << 0) -#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) -#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3) -#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) -#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6) -#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7) -#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9) -#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10) -#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12) -#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13) -#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15) -#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16) -#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18) -#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19) -#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24) -#define VM_CONTEXT1_CNTL 0x505 -#define VM_CONTEXT0_CNTL2 0x50C -#define VM_CONTEXT1_CNTL2 0x50D -#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E -#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F -#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510 -#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511 -#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512 -#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513 -#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514 -#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515 - -#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f -#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537 -#define PROTECTIONS_MASK (0xf << 0) -#define PROTECTIONS_SHIFT 0 - /* bit 0: range - * bit 1: pde0 - * bit 2: valid - * bit 3: read - * bit 4: write - */ -#define MEMORY_CLIENT_ID_MASK (0xff << 12) -#define MEMORY_CLIENT_ID_SHIFT 12 -#define MEMORY_CLIENT_RW_MASK (1 << 24) -#define MEMORY_CLIENT_RW_SHIFT 24 -#define FAULT_VMID_MASK (0xf << 25) -#define FAULT_VMID_SHIFT 25 - #define VM_INVALIDATE_REQUEST 0x51E #define VM_INVALIDATE_RESPONSE 0x51F -#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546 -#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547 - -#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F -#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550 -#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551 -#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552 -#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553 -#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554 -#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555 -#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556 -#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557 -#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558 - -#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F -#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560 - #define VM_L2_CG 0x570 #define MC_CG_ENABLE (1 << 18) #define MC_LS_ENABLE (1 << 19) -#define MC_SHARED_CHMAP 0x801 -#define NOOFCHAN_SHIFT 12 -#define NOOFCHAN_MASK 0x0000f000 -#define MC_SHARED_CHREMAP 0x802 - #define MC_VM_FB_LOCATION 0x809 #define MC_VM_AGP_TOP 0x80A #define MC_VM_AGP_BOT 0x80B @@ -491,21 +112,6 @@ #define MC_CITF_MISC_WR_CG 0x993 #define MC_CITF_MISC_VM_CG 0x994 -#define MC_ARB_RAMCFG 0x9D8 -#define NOOFBANK_SHIFT 0 -#define NOOFBANK_MASK 0x00000003 -#define NOOFRANK_SHIFT 2 -#define NOOFRANK_MASK 0x00000004 -#define NOOFROWS_SHIFT 3 -#define NOOFROWS_MASK 0x00000038 -#define NOOFCOLS_SHIFT 6 -#define NOOFCOLS_MASK 0x000000C0 -#define CHANSIZE_SHIFT 8 -#define CHANSIZE_MASK 0x00000100 -#define CHANSIZE_OVERRIDE (1 << 11) -#define NOOFGROUPS_SHIFT 12 -#define NOOFGROUPS_MASK 0x00001000 - #define MC_ARB_DRAM_TIMING 0x9DD #define MC_ARB_DRAM_TIMING2 0x9DE @@ -631,20 +237,6 @@ #define CLKS(x) ((x) << 0) #define CLKS_MASK (0xfff << 0) -#define HDP_HOST_PATH_CNTL 0xB00 -#define CLOCK_GATING_DIS (1 << 23) -#define HDP_NONSURFACE_BASE 0xB01 -#define HDP_NONSURFACE_INFO 0xB02 -#define HDP_NONSURFACE_SIZE 0xB03 - -#define HDP_DEBUG0 0xBCC - -#define HDP_ADDR_CONFIG 0xBD2 -#define HDP_MISC_CNTL 0xBD3 -#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0) -#define HDP_MEM_POWER_LS 0xBD4 -#define HDP_LS_ENABLE (1 << 0) - #define ATC_MISC_CG 0xCD4 #define IH_RB_CNTL 0xF80 @@ -674,8 +266,6 @@ # define MC_WR_CLEAN_CNT(x) ((x) << 20) # define MC_VMID(x) ((x) << 25) -#define CONFIG_MEMSIZE 0x150A - #define INTERRUPT_CNTL 0x151A # define IH_DUMMY_RD_OVERRIDE (1 << 0) # define IH_DUMMY_RD_EN (1 << 1) @@ -683,486 +273,22 @@ # define GEN_IH_INT_EN (1 << 8) #define INTERRUPT_CNTL2 0x151B -#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520 - -#define BIF_FB_EN 0x1524 -#define FB_READ_EN (1 << 0) -#define FB_WRITE_EN (1 << 1) - -#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528 - -/* DCE6 ELD audio interface */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */ -#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */ -# define MAX_CHANNELS(x) (((x) & 0x7) << 0) -/* max channels minus one. 7 = 8 channels */ -# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8) -# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16) -# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */ -/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO - * bit0 = 32 kHz - * bit1 = 44.1 kHz - * bit2 = 48 kHz - * bit3 = 88.2 kHz - * bit4 = 96 kHz - * bit5 = 176.4 kHz - * bit6 = 192 kHz - */ - -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37 -# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0) -# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8) -/* VIDEO_LIPSYNC, AUDIO_LIPSYNC - * 0 = invalid - * x = legal delay value - * 255 = sync not supported - */ -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38 -# define HBR_CAPABLE (1 << 0) /* enabled by default */ - -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a -# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0) -# define PRODUCT_ID(x) (((x) & 0xffff) << 16) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b -# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c -# define PORT_ID0(x) (((x) & 0xffffffff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d -# define PORT_ID1(x) (((x) & 0xffffffff) << 0) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e -# define DESCRIPTION0(x) (((x) & 0xff) << 0) -# define DESCRIPTION1(x) (((x) & 0xff) << 8) -# define DESCRIPTION2(x) (((x) & 0xff) << 16) -# define DESCRIPTION3(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f -# define DESCRIPTION4(x) (((x) & 0xff) << 0) -# define DESCRIPTION5(x) (((x) & 0xff) << 8) -# define DESCRIPTION6(x) (((x) & 0xff) << 16) -# define DESCRIPTION7(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40 -# define DESCRIPTION8(x) (((x) & 0xff) << 0) -# define DESCRIPTION9(x) (((x) & 0xff) << 8) -# define DESCRIPTION10(x) (((x) & 0xff) << 16) -# define DESCRIPTION11(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41 -# define DESCRIPTION12(x) (((x) & 0xff) << 0) -# define DESCRIPTION13(x) (((x) & 0xff) << 8) -# define DESCRIPTION14(x) (((x) & 0xff) << 16) -# define DESCRIPTION15(x) (((x) & 0xff) << 24) -#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42 -# define DESCRIPTION16(x) (((x) & 0xff) << 0) -# define DESCRIPTION17(x) (((x) & 0xff) << 8) - -#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54 -# define AUDIO_ENABLED (1 << 31) - -#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56 -#define PORT_CONNECTIVITY_MASK (3 << 30) -#define PORT_CONNECTIVITY_SHIFT 30 - -#define DC_LB_MEMORY_SPLIT 0x1AC3 -#define DC_LB_MEMORY_CONFIG(x) ((x) << 20) - -#define PRIORITY_A_CNT 0x1AC6 -#define PRIORITY_MARK_MASK 0x7fff -#define PRIORITY_OFF (1 << 16) -#define PRIORITY_ALWAYS_ON (1 << 20) -#define PRIORITY_B_CNT 0x1AC7 - -#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32 -# define LATENCY_WATERMARK_MASK(x) ((x) << 16) -#define DPG_PIPE_LATENCY_CONTROL 0x1B33 -# define LATENCY_LOW_WATERMARK(x) ((x) << 0) -# define LATENCY_HIGH_WATERMARK(x) ((x) << 16) - -/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */ -#define VLINE_STATUS 0x1AEE -# define VLINE_OCCURRED (1 << 0) -# define VLINE_ACK (1 << 4) -# define VLINE_STAT (1 << 12) -# define VLINE_INTERRUPT (1 << 16) -# define VLINE_INTERRUPT_TYPE (1 << 17) -/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */ -#define VBLANK_STATUS 0x1AEF -# define VBLANK_OCCURRED (1 << 0) -# define VBLANK_ACK (1 << 4) -# define VBLANK_STAT (1 << 12) -# define VBLANK_INTERRUPT (1 << 16) -# define VBLANK_INTERRUPT_TYPE (1 << 17) - -/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */ -#define INT_MASK 0x1AD0 -# define VBLANK_INT_MASK (1 << 0) -# define VLINE_INT_MASK (1 << 4) - -#define DISP_INTERRUPT_STATUS 0x183D -# define LB_D1_VLINE_INTERRUPT (1 << 2) -# define LB_D1_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD1_INTERRUPT (1 << 17) -# define DC_HPD1_RX_INTERRUPT (1 << 18) -# define DACA_AUTODETECT_INTERRUPT (1 << 22) -# define DACB_AUTODETECT_INTERRUPT (1 << 23) -# define DC_I2C_SW_DONE_INTERRUPT (1 << 24) -# define DC_I2C_HW_DONE_INTERRUPT (1 << 25) -#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E -# define LB_D2_VLINE_INTERRUPT (1 << 2) -# define LB_D2_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD2_INTERRUPT (1 << 17) -# define DC_HPD2_RX_INTERRUPT (1 << 18) -# define DISP_TIMER_INTERRUPT (1 << 24) -#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F -# define LB_D3_VLINE_INTERRUPT (1 << 2) -# define LB_D3_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD3_INTERRUPT (1 << 17) -# define DC_HPD3_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840 -# define LB_D4_VLINE_INTERRUPT (1 << 2) -# define LB_D4_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD4_INTERRUPT (1 << 17) -# define DC_HPD4_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853 -# define LB_D5_VLINE_INTERRUPT (1 << 2) -# define LB_D5_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD5_INTERRUPT (1 << 17) -# define DC_HPD5_RX_INTERRUPT (1 << 18) -#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854 -# define LB_D6_VLINE_INTERRUPT (1 << 2) -# define LB_D6_VBLANK_INTERRUPT (1 << 3) -# define DC_HPD6_INTERRUPT (1 << 17) -# define DC_HPD6_RX_INTERRUPT (1 << 18) - -/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */ -#define GRPH_INT_STATUS 0x1A16 -# define GRPH_PFLIP_INT_OCCURRED (1 << 0) -# define GRPH_PFLIP_INT_CLEAR (1 << 8) -/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */ -#define GRPH_INT_CONTROL 0x1A17 -# define GRPH_PFLIP_INT_MASK (1 << 0) -# define GRPH_PFLIP_INT_TYPE (1 << 8) - -#define DAC_AUTODETECT_INT_CONTROL 0x19F2 - -#define DC_HPD1_INT_STATUS 0x1807 -#define DC_HPD2_INT_STATUS 0x180A -#define DC_HPD3_INT_STATUS 0x180D -#define DC_HPD4_INT_STATUS 0x1810 -#define DC_HPD5_INT_STATUS 0x1813 -#define DC_HPD6_INT_STATUS 0x1816 -# define DC_HPDx_INT_STATUS (1 << 0) -# define DC_HPDx_SENSE (1 << 1) -# define DC_HPDx_RX_INT_STATUS (1 << 8) - -#define DC_HPD1_INT_CONTROL 0x1808 -#define DC_HPD2_INT_CONTROL 0x180B -#define DC_HPD3_INT_CONTROL 0x180E -#define DC_HPD4_INT_CONTROL 0x1811 -#define DC_HPD5_INT_CONTROL 0x1814 -#define DC_HPD6_INT_CONTROL 0x1817 -# define DC_HPDx_INT_ACK (1 << 0) -# define DC_HPDx_INT_POLARITY (1 << 8) -# define DC_HPDx_INT_EN (1 << 16) -# define DC_HPDx_RX_INT_ACK (1 << 20) -# define DC_HPDx_RX_INT_EN (1 << 24) - -#define DC_HPD1_CONTROL 0x1809 -#define DC_HPD2_CONTROL 0x180C -#define DC_HPD3_CONTROL 0x180F -#define DC_HPD4_CONTROL 0x1812 -#define DC_HPD5_CONTROL 0x1815 -#define DC_HPD6_CONTROL 0x1818 -# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0) -# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) -# define DC_HPDx_EN (1 << 28) - -#define DPG_PIPE_STUTTER_CONTROL 0x1B35 -# define STUTTER_ENABLE (1 << 0) - -/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */ -#define CRTC_STATUS_FRAME_COUNT 0x1BA6 - -/* Audio clocks */ -#define DCCG_AUDIO_DTO0_PHASE 0x05b0 -#define DCCG_AUDIO_DTO0_MODULE 0x05b4 -#define DCCG_AUDIO_DTO1_PHASE 0x05c0 -#define DCCG_AUDIO_DTO1_MODULE 0x05c4 - -#define GRBM_CNTL 0x2000 -#define GRBM_READ_TIMEOUT(x) ((x) << 0) - -#define GRBM_STATUS2 0x2002 -#define RLC_RQ_PENDING (1 << 0) -#define RLC_BUSY (1 << 8) -#define TC_BUSY (1 << 9) - -#define GRBM_STATUS 0x2004 -#define CMDFIFO_AVAIL_MASK 0x0000000F -#define RING2_RQ_PENDING (1 << 4) -#define SRBM_RQ_PENDING (1 << 5) -#define RING1_RQ_PENDING (1 << 6) -#define CF_RQ_PENDING (1 << 7) -#define PF_RQ_PENDING (1 << 8) -#define GDS_DMA_RQ_PENDING (1 << 9) -#define GRBM_EE_BUSY (1 << 10) -#define DB_CLEAN (1 << 12) -#define CB_CLEAN (1 << 13) -#define TA_BUSY (1 << 14) -#define GDS_BUSY (1 << 15) -#define VGT_BUSY (1 << 17) -#define IA_BUSY_NO_DMA (1 << 18) -#define IA_BUSY (1 << 19) -#define SX_BUSY (1 << 20) -#define SPI_BUSY (1 << 22) -#define BCI_BUSY (1 << 23) -#define SC_BUSY (1 << 24) -#define PA_BUSY (1 << 25) -#define DB_BUSY (1 << 26) -#define CP_COHERENCY_BUSY (1 << 28) -#define CP_BUSY (1 << 29) -#define CB_BUSY (1 << 30) -#define GUI_ACTIVE (1 << 31) -#define GRBM_STATUS_SE0 0x2005 -#define GRBM_STATUS_SE1 0x2006 -#define SE_DB_CLEAN (1 << 1) -#define SE_CB_CLEAN (1 << 2) -#define SE_BCI_BUSY (1 << 22) -#define SE_VGT_BUSY (1 << 23) -#define SE_PA_BUSY (1 << 24) -#define SE_TA_BUSY (1 << 25) -#define SE_SX_BUSY (1 << 26) -#define SE_SPI_BUSY (1 << 27) -#define SE_SC_BUSY (1 << 29) -#define SE_DB_BUSY (1 << 30) -#define SE_CB_BUSY (1 << 31) - -#define GRBM_INT_CNTL 0x2018 -# define RDERR_INT_ENABLE (1 << 0) -# define GUI_IDLE_INT_ENABLE (1 << 19) - -#define CP_STRMOUT_CNTL 0x213F -#define SCRATCH_REG0 0x2140 -#define SCRATCH_REG1 0x2141 -#define SCRATCH_REG2 0x2142 -#define SCRATCH_REG3 0x2143 -#define SCRATCH_REG4 0x2144 -#define SCRATCH_REG5 0x2145 -#define SCRATCH_REG6 0x2146 -#define SCRATCH_REG7 0x2147 - -#define SCRATCH_UMSK 0x2150 -#define SCRATCH_ADDR 0x2151 - -#define CP_SEM_WAIT_TIMER 0x216F - -#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172 - -#define CP_ME_CNTL 0x21B6 -#define CP_CE_HALT (1 << 24) -#define CP_PFP_HALT (1 << 26) -#define CP_ME_HALT (1 << 28) - -#define CP_COHER_CNTL2 0x217A - -#define CP_RB2_RPTR 0x21BE -#define CP_RB1_RPTR 0x21BF -#define CP_RB0_RPTR 0x21C0 -#define CP_RB_WPTR_DELAY 0x21C1 - -#define CP_QUEUE_THRESHOLDS 0x21D8 -#define ROQ_IB1_START(x) ((x) << 0) -#define ROQ_IB2_START(x) ((x) << 8) -#define CP_MEQ_THRESHOLDS 0x21D9 -#define MEQ1_START(x) ((x) << 0) -#define MEQ2_START(x) ((x) << 8) - -#define CP_PERFMON_CNTL 0x21FF - #define VGT_VTX_VECT_EJECT_REG 0x222C - #define VGT_ESGS_RING_SIZE 0x2232 #define VGT_GSVS_RING_SIZE 0x2233 - #define VGT_GS_VERTEX_REUSE 0x2235 - #define VGT_PRIMITIVE_TYPE 0x2256 #define VGT_INDEX_TYPE 0x2257 - #define VGT_NUM_INDICES 0x225C #define VGT_NUM_INSTANCES 0x225D - #define VGT_TF_RING_SIZE 0x2262 - #define VGT_HS_OFFCHIP_PARAM 0x226C - #define VGT_TF_MEMORY_BASE 0x226E -#define PA_CL_ENHANCE 0x2285 -#define CLIP_VTX_REORDER_ENA (1 << 0) -#define NUM_CLIP_SEQ(x) ((x) << 1) - -#define PA_SU_LINE_STIPPLE_VALUE 0x2298 - -#define PA_SC_LINE_STIPPLE_STATE 0x22C4 - -#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9 -#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0) -#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16) - -#define PA_SC_FIFO_SIZE 0x22F3 -#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0) -#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6) -#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15) -#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23) - #define PA_SC_ENHANCE 0x22FC -#define SQ_CONFIG 0x2300 - -#define SQC_CACHES 0x2302 - -#define SQ_POWER_THROTTLE 0x2396 -#define MIN_POWER(x) ((x) << 0) -#define MIN_POWER_MASK (0x3fff << 0) -#define MIN_POWER_SHIFT 0 -#define MAX_POWER(x) ((x) << 16) -#define MAX_POWER_MASK (0x3fff << 16) -#define MAX_POWER_SHIFT 0 -#define SQ_POWER_THROTTLE2 0x2397 -#define MAX_POWER_DELTA(x) ((x) << 0) -#define MAX_POWER_DELTA_MASK (0x3fff << 0) -#define MAX_POWER_DELTA_SHIFT 0 -#define STI_SIZE(x) ((x) << 16) -#define STI_SIZE_MASK (0x3ff << 16) -#define STI_SIZE_SHIFT 16 -#define LTI_RATIO(x) ((x) << 27) -#define LTI_RATIO_MASK (0xf << 27) -#define LTI_RATIO_SHIFT 27 - -#define SX_DEBUG_1 0x2418 - -#define SPI_STATIC_THREAD_MGMT_1 0x2438 -#define SPI_STATIC_THREAD_MGMT_2 0x2439 -#define SPI_STATIC_THREAD_MGMT_3 0x243A -#define SPI_PS_MAX_WAVE_ID 0x243B - -#define SPI_CONFIG_CNTL 0x2440 - -#define SPI_CONFIG_CNTL_1 0x244F -#define VTX_DONE_DELAY(x) ((x) << 0) -#define INTERP_ONE_PRIM_PER_ROW (1 << 4) - -#define CGTS_TCC_DISABLE 0x2452 -#define CGTS_USER_TCC_DISABLE 0x2453 -#define TCC_DISABLE_MASK 0xFFFF0000 -#define TCC_DISABLE_SHIFT 16 -#define CGTS_SM_CTRL_REG 0x2454 -#define OVERRIDE (1 << 21) -#define LS_OVERRIDE (1 << 22) - -#define SPI_LB_CU_MASK 0x24D5 - #define TA_CNTL_AUX 0x2542 -#define CC_RB_BACKEND_DISABLE 0x263D -#define BACKEND_DISABLE(x) ((x) << 16) -#define GB_ADDR_CONFIG 0x263E -#define NUM_PIPES(x) ((x) << 0) -#define NUM_PIPES_MASK 0x00000007 -#define NUM_PIPES_SHIFT 0 -#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4) -#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070 -#define PIPE_INTERLEAVE_SIZE_SHIFT 4 -#define NUM_SHADER_ENGINES(x) ((x) << 12) -#define NUM_SHADER_ENGINES_MASK 0x00003000 -#define NUM_SHADER_ENGINES_SHIFT 12 -#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16) -#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000 -#define SHADER_ENGINE_TILE_SIZE_SHIFT 16 -#define NUM_GPUS(x) ((x) << 20) -#define NUM_GPUS_MASK 0x00700000 -#define NUM_GPUS_SHIFT 20 -#define MULTI_GPU_TILE_SIZE(x) ((x) << 24) -#define MULTI_GPU_TILE_SIZE_MASK 0x03000000 -#define MULTI_GPU_TILE_SIZE_SHIFT 24 -#define ROW_SIZE(x) ((x) << 28) -#define ROW_SIZE_MASK 0x30000000 -#define ROW_SIZE_SHIFT 28 - -#define CB_PERFCOUNTER0_SELECT0 0x2688 -#define CB_PERFCOUNTER0_SELECT1 0x2689 -#define CB_PERFCOUNTER1_SELECT0 0x268A -#define CB_PERFCOUNTER1_SELECT1 0x268B -#define CB_PERFCOUNTER2_SELECT0 0x268C -#define CB_PERFCOUNTER2_SELECT1 0x268D -#define CB_PERFCOUNTER3_SELECT0 0x268E -#define CB_PERFCOUNTER3_SELECT1 0x268F - -#define CB_CGTT_SCLK_CTRL 0x2698 - -#define TCP_CHAN_STEER_LO 0x2B03 -#define TCP_CHAN_STEER_HI 0x2B94 - -#define CP_RB0_BASE 0x3040 -#define CP_RB0_CNTL 0x3041 -#define RB_BUFSZ(x) ((x) << 0) -#define RB_BLKSZ(x) ((x) << 8) -#define BUF_SWAP_32BIT (2 << 16) -#define RB_NO_UPDATE (1 << 27) -#define RB_RPTR_WR_ENA (1 << 31) - -#define CP_RB0_RPTR_ADDR 0x3043 -#define CP_RB0_RPTR_ADDR_HI 0x3044 -#define CP_RB0_WPTR 0x3045 - -#define CP_PFP_UCODE_ADDR 0x3054 -#define CP_PFP_UCODE_DATA 0x3055 -#define CP_ME_RAM_RADDR 0x3056 -#define CP_ME_RAM_WADDR 0x3057 -#define CP_ME_RAM_DATA 0x3058 - -#define CP_CE_UCODE_ADDR 0x305A -#define CP_CE_UCODE_DATA 0x305B - -#define CP_RB1_BASE 0x3060 -#define CP_RB1_CNTL 0x3061 -#define CP_RB1_RPTR_ADDR 0x3062 -#define CP_RB1_RPTR_ADDR_HI 0x3063 -#define CP_RB1_WPTR 0x3064 -#define CP_RB2_BASE 0x3065 -#define CP_RB2_CNTL 0x3066 -#define CP_RB2_RPTR_ADDR 0x3067 -#define CP_RB2_RPTR_ADDR_HI 0x3068 -#define CP_RB2_WPTR 0x3069 -#define CP_INT_CNTL_RING0 0x306A -#define CP_INT_CNTL_RING1 0x306B -#define CP_INT_CNTL_RING2 0x306C -# define CNTX_BUSY_INT_ENABLE (1 << 19) -# define CNTX_EMPTY_INT_ENABLE (1 << 20) -# define WAIT_MEM_SEM_INT_ENABLE (1 << 21) -# define TIME_STAMP_INT_ENABLE (1 << 26) -# define CP_RINGID2_INT_ENABLE (1 << 29) -# define CP_RINGID1_INT_ENABLE (1 << 30) -# define CP_RINGID0_INT_ENABLE (1 << 31) -#define CP_INT_STATUS_RING0 0x306D -#define CP_INT_STATUS_RING1 0x306E -#define CP_INT_STATUS_RING2 0x306F -# define WAIT_MEM_SEM_INT_STAT (1 << 21) -# define TIME_STAMP_INT_STAT (1 << 26) -# define CP_RINGID2_INT_STAT (1 << 29) -# define CP_RINGID1_INT_STAT (1 << 30) -# define CP_RINGID0_INT_STAT (1 << 31) - // #define PA_SC_RASTER_CONFIG 0xA0D4 # define RB_XSEL2(x) ((x) << 4) # define RB_XSEL2_MASK (0x3 << 4) @@ -1185,171 +311,14 @@ # define SE_YSEL(x) ((x) << 28) # define SE_YSEL_MASK (0x3 << 28) -/* PIF PHY0 registers idx/data 0x8/0xc */ -#define PB0_PIF_CNTL 0x10 -# define LS2_EXIT_TIME(x) ((x) << 17) -# define LS2_EXIT_TIME_MASK (0x7 << 17) -# define LS2_EXIT_TIME_SHIFT 17 -#define PB0_PIF_PAIRING 0x11 -# define MULTI_PIF (1 << 25) -#define PB0_PIF_PWRDOWN_0 0x12 -# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10 -# define PLL_RAMP_UP_TIME_0(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_0_SHIFT 24 -#define PB0_PIF_PWRDOWN_1 0x13 -# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10 -# define PLL_RAMP_UP_TIME_1(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_1_SHIFT 24 - -#define PB0_PIF_PWRDOWN_2 0x17 -# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10 -# define PLL_RAMP_UP_TIME_2(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_2_SHIFT 24 -#define PB0_PIF_PWRDOWN_3 0x18 -# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7) -# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7) -# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7 -# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10) -# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10) -# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10 -# define PLL_RAMP_UP_TIME_3(x) ((x) << 24) -# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24) -# define PLL_RAMP_UP_TIME_3_SHIFT 24 -/* PIF PHY1 registers idx/data 0x10/0x14 */ -#define PB1_PIF_CNTL 0x10 -#define PB1_PIF_PAIRING 0x11 -#define PB1_PIF_PWRDOWN_0 0x12 -#define PB1_PIF_PWRDOWN_1 0x13 - -#define PB1_PIF_PWRDOWN_2 0x17 -#define PB1_PIF_PWRDOWN_3 0x18 -/* PCIE registers idx/data 0x30/0x34 */ -#define PCIE_CNTL2 0x1c /* PCIE */ -# define SLV_MEM_LS_EN (1 << 16) -# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17) -# define MST_MEM_LS_EN (1 << 18) -# define REPLAY_MEM_LS_EN (1 << 19) -#define PCIE_LC_STATUS1 0x28 /* PCIE */ -# define LC_REVERSE_RCVR (1 << 0) -# define LC_REVERSE_XMIT (1 << 1) -# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2) -# define LC_OPERATING_LINK_WIDTH_SHIFT 2 -# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5) -# define LC_DETECTED_LINK_WIDTH_SHIFT 5 - -#define PCIE_P_CNTL 0x40 /* PCIE */ -# define P_IGNORE_EDB_ERR (1 << 6) - /* PCIE PORT registers idx/data 0x38/0x3c */ -#define PCIE_LC_CNTL 0xa0 -# define LC_L0S_INACTIVITY(x) ((x) << 8) -# define LC_L0S_INACTIVITY_MASK (0xf << 8) -# define LC_L0S_INACTIVITY_SHIFT 8 -# define LC_L1_INACTIVITY(x) ((x) << 12) -# define LC_L1_INACTIVITY_MASK (0xf << 12) -# define LC_L1_INACTIVITY_SHIFT 12 -# define LC_PMI_TO_L1_DIS (1 << 16) -# define LC_ASPM_TO_L1_DIS (1 << 24) -#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ -# define LC_LINK_WIDTH_SHIFT 0 -# define LC_LINK_WIDTH_MASK 0x7 +// #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ # define LC_LINK_WIDTH_X0 0 # define LC_LINK_WIDTH_X1 1 # define LC_LINK_WIDTH_X2 2 # define LC_LINK_WIDTH_X4 3 # define LC_LINK_WIDTH_X8 4 # define LC_LINK_WIDTH_X16 6 -# define LC_LINK_WIDTH_RD_SHIFT 4 -# define LC_LINK_WIDTH_RD_MASK 0x70 -# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7) -# define LC_RECONFIG_NOW (1 << 8) -# define LC_RENEGOTIATION_SUPPORT (1 << 9) -# define LC_RENEGOTIATE_EN (1 << 10) -# define LC_SHORT_RECONFIG_EN (1 << 11) -# define LC_UPCONFIGURE_SUPPORT (1 << 12) -# define LC_UPCONFIGURE_DIS (1 << 13) -# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21) -# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21) -# define LC_DYN_LANES_PWR_STATE_SHIFT 21 -#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */ -# define LC_XMIT_N_FTS(x) ((x) << 0) -# define LC_XMIT_N_FTS_MASK (0xff << 0) -# define LC_XMIT_N_FTS_SHIFT 0 -# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8) -# define LC_N_FTS_MASK (0xff << 24) -#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */ -# define LC_GEN2_EN_STRAP (1 << 0) -# define LC_GEN3_EN_STRAP (1 << 1) -# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2) -# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3) -# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3 -# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5) -# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6) -# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7) -# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8) -# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9) -# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10) -# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10 -# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */ -# define LC_CURRENT_DATA_RATE_SHIFT 13 -# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16) -# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18) -# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19) -# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20) -# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21) - -#define PCIE_LC_CNTL2 0xb1 -# define LC_ALLOW_PDWN_IN_L1 (1 << 17) -# define LC_ALLOW_PDWN_IN_L23 (1 << 18) - -#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */ -# define LC_GO_TO_RECOVERY (1 << 30) -#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */ -# define LC_REDO_EQ (1 << 5) -# define LC_SET_QUIESCE (1 << 13) - -/* - * UVD - */ -#define UVD_UDEC_ADDR_CONFIG 0x3bd3 -#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4 -#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5 -#define UVD_RBC_RB_RPTR 0x3da4 -#define UVD_RBC_RB_WPTR 0x3da5 -#define UVD_STATUS 0x3daf - -#define UVD_CGC_CTRL 0x3dc2 -# define DCM (1 << 0) -# define CG_DT(x) ((x) << 2) -# define CG_DT_MASK (0xf << 2) -# define CLK_OD(x) ((x) << 6) -# define CLK_OD_MASK (0x1f << 6) - - /* UVD CTX indirect */ -#define UVD_CGC_MEM_CTRL 0xC0 -#define UVD_CGC_CTRL2 0xC1 -# define DYN_OR_EN (1 << 0) -# define DYN_RR_EN (1 << 1) -# define G_DIV_ID(x) ((x) << 2) -# define G_DIV_ID_MASK (0x7 << 2) /* * PM4 @@ -1583,45 +552,7 @@ /* ASYNC DMA - first instance at 0xd000, second at 0xd800 */ #define DMA0_REGISTER_OFFSET 0x0 /* not a register */ #define DMA1_REGISTER_OFFSET 0x200 /* not a register */ - -#define DMA_RB_CNTL 0x3400 -# define DMA_RB_ENABLE (1 << 0) -# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */ -# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12) -# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */ -# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */ -#define DMA_RB_BASE 0x3401 -#define DMA_RB_RPTR 0x3402 -#define DMA_RB_WPTR 0x3403 - -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 - -#define DMA_IB_CNTL 0x3409 -# define DMA_IB_ENABLE (1 << 0) -# define DMA_IB_SWAP_ENABLE (1 << 4) -# define CMD_VMID_FORCE (1 << 31) -#define DMA_IB_RPTR 0x340a -#define DMA_CNTL 0x340b -# define TRAP_ENABLE (1 << 0) -# define SEM_INCOMPLETE_INT_ENABLE (1 << 1) -# define SEM_WAIT_INT_ENABLE (1 << 2) -# define DATA_SWAP_ENABLE (1 << 3) -# define FENCE_SWAP_ENABLE (1 << 4) -# define CTXEMPTY_INT_ENABLE (1 << 28) -#define DMA_STATUS_REG 0x340d -# define DMA_IDLE (1 << 0) -#define DMA_TILING_CONFIG 0x342e - -#define DMA_POWER_CNTL 0x342f -# define MEM_POWER_OVERRIDE (1 << 8) -#define DMA_CLK_CTRL 0x3430 - -#define DMA_PG 0x3435 -# define PG_CNTL_ENABLE (1 << 0) -#define DMA_PGFSM_CONFIG 0x3436 -#define DMA_PGFSM_WRITE 0x3437 +#define SDMA_MAX_INSTANCE 2 #define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \ (((b) & 0x1) << 26) | \ @@ -1650,6 +581,7 @@ #define DMA_PACKET_POLL_REG_MEM 0xe #define DMA_PACKET_NOP 0xf +/* VCE */ #define VCE_STATUS 0x20004 #define VCE_VCPU_CNTL 0x20014 #define VCE_CLK_EN (1 << 0) @@ -1726,378 +658,118 @@ #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 -#define AMDGPU_MM_INDEX 0x0000 -#define AMDGPU_MM_DATA 0x0001 -#define VERDE_NUM_CRTC 6 -#define BLACKOUT_MODE_MASK 0x00000007 -#define VGA_RENDER_CONTROL 0xC0 -#define R_000300_VGA_RENDER_CONTROL 0xC0 -#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF -#define EVERGREEN_CRTC_STATUS 0x1BA3 -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4 -/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */ -#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d -#define EVERGREEN_CRTC_CONTROL 0x1b9c -#define EVERGREEN_CRTC_MASTER_EN (1 << 0) -#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24) -#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d -#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8) -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8 -#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5 -#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd -#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05 -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) - -#define EVERGREEN_DATA_FORMAT 0x1ac0 -# define EVERGREEN_INTERLEAVE_EN (1 << 0) - -#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20) -#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20) -#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20) -#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20) - -#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45 -#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845 - -#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847 -#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47 - -#define R600_D1GRPH_SWAP_CONTROL 0x1843 -#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0) -#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0) - -#define AVIVO_D1VGA_CONTROL 0x00cc -# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0) -# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8) -# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9) -# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10) -# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16) -# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24) -#define AVIVO_D2VGA_CONTROL 0x00ce - -#define R600_BUS_CNTL 0x1508 -# define R600_BIOS_ROM_DIS (1 << 1) #define R600_ROM_CNTL 0x580 # define R600_SCK_OVERWRITE (1 << 1) # define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 # define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28) -#define FMT_BIT_DEPTH_CONTROL 0x1bf2 -#define FMT_TRUNCATE_EN (1 << 0) -#define FMT_TRUNCATE_DEPTH (1 << 4) -#define FMT_SPATIAL_DITHER_EN (1 << 8) -#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9) -#define FMT_SPATIAL_DITHER_DEPTH (1 << 12) -#define FMT_FRAME_RANDOM_ENABLE (1 << 13) -#define FMT_RGB_RANDOM_ENABLE (1 << 14) -#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15) -#define FMT_TEMPORAL_DITHER_EN (1 << 16) -#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20) -#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21) -#define FMT_TEMPORAL_LEVEL (1 << 24) -#define FMT_TEMPORAL_DITHER_RESET (1 << 25) -#define FMT_25FRC_SEL(x) ((x) << 26) -#define FMT_50FRC_SEL(x) ((x) << 28) -#define FMT_75FRC_SEL(x) ((x) << 30) +#define GRPH_ARRAY_LINEAR_GENERAL 0 +#define GRPH_ARRAY_LINEAR_ALIGNED 1 +#define GRPH_ARRAY_1D_TILED_THIN1 2 +#define GRPH_ARRAY_2D_TILED_THIN1 4 -#define EVERGREEN_DC_LUT_CONTROL 0x1a80 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82 -#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85 -#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86 -#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c -#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79 -#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e -#define EVERGREEN_DC_LUT_RW_MODE 0x1a78 +#define ES_AND_GS_AUTO 3 +#define BUF_SWAP_32BIT (2 << 16) -#define EVERGREEN_GRPH_ENABLE 0x1a00 -#define EVERGREEN_GRPH_CONTROL 0x1a01 -#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0) -#define EVERGREEN_GRPH_DEPTH_8BPP 0 -#define EVERGREEN_GRPH_DEPTH_16BPP 1 -#define EVERGREEN_GRPH_DEPTH_32BPP 2 -#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2) -#define EVERGREEN_ADDR_SURF_2_BANK 0 -#define EVERGREEN_ADDR_SURF_4_BANK 1 -#define EVERGREEN_ADDR_SURF_8_BANK 2 -#define EVERGREEN_ADDR_SURF_16_BANK 3 -#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4) -#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6) -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2 -#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3 -#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8) +#define GRPH_DEPTH_8BPP 0 +#define GRPH_DEPTH_16BPP 1 +#define GRPH_DEPTH_32BPP 2 -#define EVERGREEN_GRPH_FORMAT_INDEXED 0 -#define EVERGREEN_GRPH_FORMAT_ARGB1555 0 -#define EVERGREEN_GRPH_FORMAT_ARGB565 1 -#define EVERGREEN_GRPH_FORMAT_ARGB4444 2 -#define EVERGREEN_GRPH_FORMAT_AI88 3 -#define EVERGREEN_GRPH_FORMAT_MONO16 4 -#define EVERGREEN_GRPH_FORMAT_BGRA5551 5 +/* 8 BPP */ +#define GRPH_FORMAT_INDEXED 0 + +/* 16 BPP */ +#define GRPH_FORMAT_ARGB1555 0 +#define GRPH_FORMAT_ARGB565 1 +#define GRPH_FORMAT_ARGB4444 2 +#define GRPH_FORMAT_AI88 3 +#define GRPH_FORMAT_MONO16 4 +#define GRPH_FORMAT_BGRA5551 5 /* 32 BPP */ -#define EVERGREEN_GRPH_FORMAT_ARGB8888 0 -#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1 -#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2 -#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3 -#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4 -#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5 -#define EVERGREEN_GRPH_FORMAT_RGB111110 6 -#define EVERGREEN_GRPH_FORMAT_BGR101111 7 -#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11) -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2 -#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3 -#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13) -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5 -#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6 -#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18) -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3 -#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20) -#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0 -#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1 -#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2 -#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2 -#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3 +#define GRPH_FORMAT_ARGB8888 0 +#define GRPH_FORMAT_ARGB2101010 1 +#define GRPH_FORMAT_32BPP_DIG 2 +#define GRPH_FORMAT_8B_ARGB2101010 3 +#define GRPH_FORMAT_BGRA1010102 4 +#define GRPH_FORMAT_8B_BGRA1010102 5 +#define GRPH_FORMAT_RGB111110 6 +#define GRPH_FORMAT_BGR101111 7 -#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03 -#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0) -# define EVERGREEN_GRPH_ENDIAN_NONE 0 -# define EVERGREEN_GRPH_ENDIAN_8IN16 1 -# define EVERGREEN_GRPH_ENDIAN_8IN32 2 -# define EVERGREEN_GRPH_ENDIAN_8IN64 3 -#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) -# define EVERGREEN_GRPH_RED_SEL_R 0 -# define EVERGREEN_GRPH_RED_SEL_G 1 -# define EVERGREEN_GRPH_RED_SEL_B 2 -# define EVERGREEN_GRPH_RED_SEL_A 3 -#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) -# define EVERGREEN_GRPH_GREEN_SEL_G 0 -# define EVERGREEN_GRPH_GREEN_SEL_B 1 -# define EVERGREEN_GRPH_GREEN_SEL_A 2 -# define EVERGREEN_GRPH_GREEN_SEL_R 3 -#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) -# define EVERGREEN_GRPH_BLUE_SEL_B 0 -# define EVERGREEN_GRPH_BLUE_SEL_A 1 -# define EVERGREEN_GRPH_BLUE_SEL_R 2 -# define EVERGREEN_GRPH_BLUE_SEL_G 3 -#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) -# define EVERGREEN_GRPH_ALPHA_SEL_A 0 -# define EVERGREEN_GRPH_ALPHA_SEL_R 1 -# define EVERGREEN_GRPH_ALPHA_SEL_G 2 -# define EVERGREEN_GRPH_ALPHA_SEL_B 3 +#define GRPH_ENDIAN_NONE 0 +#define GRPH_ENDIAN_8IN16 1 +#define GRPH_ENDIAN_8IN32 2 +#define GRPH_ENDIAN_8IN64 3 +#define GRPH_RED_SEL_R 0 +#define GRPH_RED_SEL_G 1 +#define GRPH_RED_SEL_B 2 +#define GRPH_RED_SEL_A 3 -#define EVERGREEN_D3VGA_CONTROL 0xf8 -#define EVERGREEN_D4VGA_CONTROL 0xf9 -#define EVERGREEN_D5VGA_CONTROL 0xfa -#define EVERGREEN_D6VGA_CONTROL 0xfb +#define GRPH_GREEN_SEL_G 0 +#define GRPH_GREEN_SEL_B 1 +#define GRPH_GREEN_SEL_A 2 +#define GRPH_GREEN_SEL_R 3 -#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00 +#define GRPH_BLUE_SEL_B 0 +#define GRPH_BLUE_SEL_A 1 +#define GRPH_BLUE_SEL_R 2 +#define GRPH_BLUE_SEL_G 3 -#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02 -#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8) +#define GRPH_ALPHA_SEL_A 0 +#define GRPH_ALPHA_SEL_R 1 +#define GRPH_ALPHA_SEL_G 2 +#define GRPH_ALPHA_SEL_B 3 -#define EVERGREEN_GRPH_PITCH 0x1a06 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09 -#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a -#define EVERGREEN_GRPH_X_START 0x1a0b -#define EVERGREEN_GRPH_Y_START 0x1a0c -#define EVERGREEN_GRPH_X_END 0x1a0d -#define EVERGREEN_GRPH_Y_END 0x1a0e -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12 -#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0) +/* CUR_CONTROL */ + #define CURSOR_MONO 0 + #define CURSOR_24_1 1 + #define CURSOR_24_8_PRE_MULT 2 + #define CURSOR_24_8_UNPRE_MULT 3 + #define CURSOR_URGENT_ALWAYS 0 + #define CURSOR_URGENT_1_8 1 + #define CURSOR_URGENT_1_4 2 + #define CURSOR_URGENT_3_8 3 + #define CURSOR_URGENT_1_2 4 -#define EVERGREEN_VIEWPORT_START 0x1b5c -#define EVERGREEN_VIEWPORT_SIZE 0x1b5d -#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1 +/* INPUT_CSC_CONTROL */ +# define INPUT_CSC_BYPASS 0 +# define INPUT_CSC_PROG_COEFF 1 +# define INPUT_CSC_PROG_SHARED_MATRIXA 2 -/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */ -#define EVERGREEN_CUR_CONTROL 0x1a66 -# define EVERGREEN_CURSOR_EN (1 << 0) -# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8) -# define EVERGREEN_CURSOR_MONO 0 -# define EVERGREEN_CURSOR_24_1 1 -# define EVERGREEN_CURSOR_24_8_PRE_MULT 2 -# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3 -# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16) -# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20) -# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24) -# define EVERGREEN_CURSOR_URGENT_ALWAYS 0 -# define EVERGREEN_CURSOR_URGENT_1_8 1 -# define EVERGREEN_CURSOR_URGENT_1_4 2 -# define EVERGREEN_CURSOR_URGENT_3_8 3 -# define EVERGREEN_CURSOR_URGENT_1_2 4 -#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67 -# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000 -#define EVERGREEN_CUR_SIZE 0x1a68 -#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69 -#define EVERGREEN_CUR_POSITION 0x1a6a -#define EVERGREEN_CUR_HOT_SPOT 0x1a6b -#define EVERGREEN_CUR_COLOR1 0x1a6c -#define EVERGREEN_CUR_COLOR2 0x1a6d -#define EVERGREEN_CUR_UPDATE 0x1a6e -# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0) -# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1) -# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16) -# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) +/* OUTPUT_CSC_CONTROL */ +# define OUTPUT_CSC_BYPASS 0 +# define OUTPUT_CSC_TV_RGB 1 +# define OUTPUT_CSC_YCBCR_601 2 +# define OUTPUT_CSC_YCBCR_709 3 +# define OUTPUT_CSC_PROG_COEFF 4 +# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5 + +/* DEGAMMA_CONTROL */ +# define DEGAMMA_BYPASS 0 +# define DEGAMMA_SRGB_24 1 +# define DEGAMMA_XVYCC_222 2 + +/* GAMUT_REMAP_CONTROL */ +# define GAMUT_REMAP_BYPASS 0 +# define GAMUT_REMAP_PROG_COEFF 1 +# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2 +# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3 + +/* REGAMMA_CONTROL */ +# define REGAMMA_BYPASS 0 +# define REGAMMA_SRGB_24 1 +# define REGAMMA_XVYCC_222 2 +# define REGAMMA_PROG_A 3 +# define REGAMMA_PROG_B 4 -#define NI_INPUT_CSC_CONTROL 0x1a35 -# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0) -# define NI_INPUT_CSC_BYPASS 0 -# define NI_INPUT_CSC_PROG_COEFF 1 -# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2 -# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4) - -#define NI_OUTPUT_CSC_CONTROL 0x1a3c -# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0) -# define NI_OUTPUT_CSC_BYPASS 0 -# define NI_OUTPUT_CSC_TV_RGB 1 -# define NI_OUTPUT_CSC_YCBCR_601 2 -# define NI_OUTPUT_CSC_YCBCR_709 3 -# define NI_OUTPUT_CSC_PROG_COEFF 4 -# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5 -# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4) - -#define NI_DEGAMMA_CONTROL 0x1a58 -# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0) -# define NI_DEGAMMA_BYPASS 0 -# define NI_DEGAMMA_SRGB_24 1 -# define NI_DEGAMMA_XVYCC_222 2 -# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4) -# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8) -# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12) - -#define NI_GAMUT_REMAP_CONTROL 0x1a59 -# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0) -# define NI_GAMUT_REMAP_BYPASS 0 -# define NI_GAMUT_REMAP_PROG_COEFF 1 -# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2 -# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3 -# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4) - -#define NI_REGAMMA_CONTROL 0x1aa0 -# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0) -# define NI_REGAMMA_BYPASS 0 -# define NI_REGAMMA_SRGB_24 1 -# define NI_REGAMMA_XVYCC_222 2 -# define NI_REGAMMA_PROG_A 3 -# define NI_REGAMMA_PROG_B 4 -# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4) - - -#define NI_PRESCALE_GRPH_CONTROL 0x1a2d -# define NI_GRPH_PRESCALE_BYPASS (1 << 4) - -#define NI_PRESCALE_OVL_CONTROL 0x1a31 -# define NI_OVL_PRESCALE_BYPASS (1 << 4) - -#define NI_INPUT_GAMMA_CONTROL 0x1a10 -# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0) -# define NI_INPUT_GAMMA_USE_LUT 0 -# define NI_INPUT_GAMMA_BYPASS 1 -# define NI_INPUT_GAMMA_SRGB_24 2 -# define NI_INPUT_GAMMA_XVYCC_222 3 -# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4) - -#define BLACKOUT_MODE_MASK 0x00000007 -#define VGA_RENDER_CONTROL 0xC0 -#define R_000300_VGA_RENDER_CONTROL 0xC0 -#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF -#define EVERGREEN_CRTC_STATUS 0x1BA3 -#define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4 -/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */ -#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d -#define EVERGREEN_CRTC_CONTROL 0x1b9c -# define EVERGREEN_CRTC_MASTER_EN (1 << 0) -# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24) -#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d -# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8) -# define EVERGREEN_CRTC_V_BLANK (1 << 0) -#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8 -#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5 -#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd -#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe -#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16) -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08 -#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04 -#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05 -#define EVERGREEN_GRPH_UPDATE 0x1a11 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4 -#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9 -#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2) - -#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10 -#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4 -#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80 -#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7 -#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400 -#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa -#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000 -#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd -#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000 -#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10 -#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000 -#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13 - -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000 -#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18 - -#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7 -#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0 - -#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1 -#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0 -#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2 -#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1 - -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800 -#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb +/* INPUT_GAMMA_CONTROL */ +# define INPUT_GAMMA_USE_LUT 0 +# define INPUT_GAMMA_BYPASS 1 +# define INPUT_GAMMA_SRGB_24 2 +# define INPUT_GAMMA_XVYCC_222 3 #define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__GDDR1 0x10000000 @@ -2113,20 +785,9 @@ #define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) #define PACKET3_SEM_SEL_WAIT (0x7 << 29) -#define CONFIG_CNTL 0x1509 -#define CC_DRM_ID_STRAPS 0X1559 #define AMDGPU_PCIE_INDEX 0xc #define AMDGPU_PCIE_DATA 0xd -#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 -#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 -#define DMA_MODE 0x342f -#define DMA_RB_RPTR_ADDR_HI 0x3407 -#define DMA_RB_RPTR_ADDR_LO 0x3408 -#define DMA_BUSY_MASK 0x20 -#define DMA1_BUSY_MASK 0X40 -#define SDMA_MAX_INSTANCE 2 - #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) #define PCIE_PORT_INDEX 0xe @@ -2136,8 +797,6 @@ #define EVERGREEN_PIF_PHY1_INDEX 0x10 #define EVERGREEN_PIF_PHY1_DATA 0x14 -#define MC_VM_FB_OFFSET 0x81a - /* Discrete VCE clocks */ #define CG_VCEPLL_FUNC_CNTL 0xc0030600 #define VCEPLL_RESET_MASK 0x00000001 diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 659eab9b90be..c457be3a3c56 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,6 +584,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev) * Enable triggering of GPU reset only if specified * by module parameter. */ + if (adev->pcie_reset_ctx.in_link_reset) + return AMD_RESET_METHOD_LINK; if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5) return AMD_RESET_METHOD_MODE2; else if (!(adev->flags & AMD_IS_APU)) @@ -640,6 +642,9 @@ asic_reset: case AMD_RESET_METHOD_MODE2: dev_info(adev->dev, "MODE2 reset\n"); return amdgpu_dpm_mode2_reset(adev); + case AMD_RESET_METHOD_LINK: + dev_info(adev->dev, "Link reset\n"); + return amdgpu_device_link_reset(adev); default: dev_info(adev->dev, "MODE1 reset\n"); return amdgpu_device_mode1_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index a5000c171c02..cf93fa477674 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -552,6 +552,11 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER_9_0 0xD7 +/* 1. header + * 2. RESERVED [31:0] + */ + #define PACKET3_RUN_CLEANER_SHADER 0xD2 /* 1. header * 2. RESERVED [31:0] diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 0e404c074975..e590cbdd8de9 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -174,19 +174,76 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } +static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + uint32_t vram_type = adev->gmc.vram_type; + struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits); + + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + + /* default setting */ + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT; + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT; + flip_bits->flip_row_bit = 13; + flip_bits->bit_num = 4; + flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT; + + if (nps == AMDGPU_NPS2_PARTITION_MODE) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT; + } else if (nps == AMDGPU_NPS4_PARTITION_MODE) { + flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT; + flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT; + flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT; + flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT; + } + + switch (vram_type) { + case AMDGPU_VRAM_TYPE_HBM: + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + else if (nps == AMDGPU_NPS4_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + + break; + case AMDGPU_VRAM_TYPE_HBM3E: + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT; + flip_bits->flip_row_bit = 12; + + if (nps == AMDGPU_NPS2_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT; + else if (nps == AMDGPU_NPS4_PARTITION_MODE) + flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT; + + break; + default: + dev_warn(adev->dev, + "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n"); + break; + } + + adev->umc.retire_unit = 0x1 << flip_bits->bit_num; +} + static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out, bool dump_addr) { - uint32_t col, col_lower, row, row_lower, bank; + uint32_t col, col_lower, row, row_lower, row_high, bank; uint32_t channel_index = 0, umc_inst = 0; - uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS]; + uint32_t i, bit_num, retire_unit, *flip_bits; uint64_t soc_pa, column, err_addr; struct ta_ras_query_address_output addr_out_tmp; struct ta_ras_query_address_output *paddr_out; - enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; int ret = 0; if (!addr_out) @@ -211,46 +268,46 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, umc_inst = addr_in->ma.umc_inst; } - loop_bits[0] = UMC_V12_0_PA_C2_BIT; - loop_bits[1] = UMC_V12_0_PA_C3_BIT; - loop_bits[2] = UMC_V12_0_PA_C4_BIT; - loop_bits[3] = UMC_V12_0_PA_R13_BIT; - - if (adev->gmc.gmc_funcs->query_mem_partition_mode) - nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); - - /* other nps modes are taken as nps1 */ - if (nps == AMDGPU_NPS4_PARTITION_MODE) { - loop_bits[0] = UMC_V12_0_PA_CH4_BIT; - loop_bits[1] = UMC_V12_0_PA_CH5_BIT; - loop_bits[2] = UMC_V12_0_PA_B0_BIT; - loop_bits[3] = UMC_V12_0_PA_R11_BIT; - } + flip_bits = adev->umc.flip_bits.flip_bits_in_pa; + bit_num = adev->umc.flip_bits.bit_num; + retire_unit = adev->umc.retire_unit; soc_pa = paddr_out->pa.pa; channel_index = paddr_out->pa.channel_idx; /* clear loop bits in soc physical address */ - for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) - soc_pa &= ~BIT_ULL(loop_bits[i]); + for (i = 0; i < bit_num; i++) + soc_pa &= ~BIT_ULL(flip_bits[i]); paddr_out->pa.pa = soc_pa; /* get column bit 0 and 1 in mca address */ col_lower = (err_addr >> 1) & 0x3ULL; - /* MA_R13_BIT will be handled later */ + /* extra row bit will be handled later */ row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL; + row_lower &= ~BIT_ULL(adev->umc.flip_bits.flip_row_bit); + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 5, 0)) { + row_high = (soc_pa >> adev->umc.flip_bits.r13_in_pa) & 0x3ULL; + /* it's 2.25GB in each channel, from MCA address to PA + * [R14 R13] is converted if the two bits value are 0x3, + * get them from PA instead of MCA address. + */ + row_lower |= (row_high << 13); + } if (!err_data && !dump_addr) goto out; /* loop for all possibilities of retired bits */ - for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) { + for (column = 0; column < retire_unit; column++) { soc_pa = paddr_out->pa.pa; - for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) - soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]); + for (i = 0; i < bit_num; i++) + soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]); col = ((column & 0x7) << 2) | col_lower; - /* add row bit 13 */ - row = ((column >> 3) << 13) | row_lower; + /* handle extra row bit */ + if (bit_num == RETIRE_FLIP_BITS_NUM) + row = ((column >> 3) << adev->umc.flip_bits.flip_row_bit) | + row_lower; if (dump_addr) dev_info(adev->dev, @@ -428,8 +485,12 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank bank->regs[ACA_REG_IDX_ADDR]); ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); - count = ext_error_code == 0 ? - ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; + if (umc_v12_0_is_deferred_error(adev, status)) + count = ext_error_code == 0 ? + adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL; + else + count = ext_error_code == 0 ? + ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; return aca_error_cache_log_bank_error(handle, &info, err_type, count); } @@ -469,8 +530,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; struct ta_ras_query_address_output addr_out; - enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; - uint32_t shift_bit = UMC_V12_0_PA_C4_BIT; + uint32_t shift_bit = adev->umc.flip_bits.flip_bits_in_pa[2]; int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); @@ -515,11 +575,6 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT; ecc_err->channel_idx = addr_out.pa.channel_idx; - if (adev->gmc.gmc_funcs->query_mem_partition_mode) - nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); - if (nps == AMDGPU_NPS4_PARTITION_MODE) - shift_bit = UMC_V12_0_PA_B0_BIT; - /* If converted pa_pfn is 0, use pa C4 pfn. */ if (!ecc_err->pa_pfn) ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT; @@ -665,5 +720,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = { .update_ecc_status = umc_v12_0_update_ecc_status, .convert_ras_err_addr = umc_v12_0_convert_error_address, .get_die_id_from_pa = umc_v12_0_get_die_id, + .get_retire_flip_bits = umc_v12_0_get_retire_flip_bits, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 9298018d938f..63b7e7254526 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,8 +55,6 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) -/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */ -#define UMC_V12_0_RETIRE_LOOP_BITS 4 /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 @@ -64,13 +62,16 @@ #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ #define UMC_V12_0_PA_R0_BIT 22 +#define UMC_V12_0_PA_R10_BIT 32 #define UMC_V12_0_PA_R11_BIT 33 +#define UMC_V12_0_PA_R12_BIT 34 #define UMC_V12_0_PA_R13_BIT 35 /* channel bit in SOC physical address */ #define UMC_V12_0_PA_CH4_BIT 12 #define UMC_V12_0_PA_CH5_BIT 13 /* bank bit in SOC physical address */ #define UMC_V12_0_PA_B0_BIT 19 +#define UMC_V12_0_PA_B1_BIT 20 /* row bits in MCA address */ #define UMC_V12_0_MA_R0_BIT 10 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 1f777c125b00..8fff470bce87 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -239,9 +239,9 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_pause_dpg_mode; } - /* TODO: Add queue reset mask when FW fully supports it */ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); @@ -1947,6 +1947,20 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; } +static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + vcn_v4_0_stop(vinst); + vcn_v4_0_start(vinst); + + return amdgpu_ring_test_helper(ring); +} + static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1976,6 +1990,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_ring_reset, }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 012f6ea928ec..712e1fba33ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -288,6 +288,31 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v4_0_3_hw_init_inst(struct amdgpu_vcn_inst *vinst) +{ + int vcn_inst; + struct amdgpu_device *adev = vinst->adev; + struct amdgpu_ring *ring; + int inst_idx = vinst->inst; + + vcn_inst = GET_INST(VCN, inst_idx); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst, + adev->vcn.inst[inst_idx].aid_id); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + } + + return 0; +} + /** * vcn_v4_0_3_hw_init - start and test VCN block * @@ -299,7 +324,8 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; - int i, r, vcn_inst; + struct amdgpu_vcn_inst *vinst; + int i, r; if (amdgpu_sriov_vf(adev)) { r = vcn_v4_0_3_start_sriov(adev); @@ -322,28 +348,9 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { struct amdgpu_vcn4_fw_shared *fw_shared; - vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); - } + vinst = &adev->vcn.inst[i]; + vcn_v4_0_3_hw_init_inst(vinst); /* Re-init fw_shared when RAS fatal error occurred */ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; @@ -1564,6 +1571,37 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + int r = 0; + int vcn_inst; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + if (amdgpu_sriov_vf(ring->adev)) + return -EOPNOTSUPP; + + if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + vcn_inst = GET_INST(VCN, ring->me); + r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); + + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); + return r; + } + + /* This flag is not set for VF, assumed to be disabled always */ + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + vcn_v4_0_3_hw_init_inst(vinst); + vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram); + r = amdgpu_ring_test_helper(ring); + + return r; +} + static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1592,6 +1630,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_3_ring_reset, }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index f11df9c2ec13..a09f9a2dd471 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -208,6 +208,10 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; + fw_shared->drm_key_wa.method = + AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); @@ -215,6 +219,13 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; } + adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -1444,6 +1455,20 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + vcn_v4_0_5_stop(vinst); + vcn_v4_0_5_start(vinst); + + return amdgpu_ring_test_helper(ring); +} + static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1471,6 +1496,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v4_0_5_ring_reset, }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index b90da3d3e140..27dcc6f37a73 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -196,9 +196,9 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; } - /* TODO: Add queue reset mask when FW fully supports it */ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; vcn_v5_0_0_alloc_ip_dump(adev); @@ -1172,6 +1172,20 @@ static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + vcn_v5_0_0_stop(vinst); + vcn_v5_0_0_start(vinst); + + return amdgpu_ring_test_helper(ring); +} + static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1199,6 +1213,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v5_0_0_ring_reset, }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index e0e84ef7f568..8e843011703c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -30,6 +30,7 @@ #include "soc15_hw_ip.h" #include "vcn_v2_0.h" #include "vcn_v4_0_3.h" +#include "mmsch_v5_0.h" #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" @@ -39,6 +40,7 @@ #include +static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev); static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, @@ -126,7 +128,14 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst; + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); @@ -143,6 +152,12 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + vcn_v5_0_0_alloc_ip_dump(adev); return amdgpu_vcn_sysfs_reset_mask_init(adev); @@ -172,6 +187,9 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) @@ -204,24 +222,38 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; int i, r, vcn_inst; - if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) - adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); - ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst), - adev->vcn.inst[i].aid_id); - - /* Re-init fw_shared, if required */ - vcn_v5_0_1_fw_shared_init(adev, i); - - r = amdgpu_ring_test_helper(ring); + if (amdgpu_sriov_vf(adev)) { + r = vcn_v5_0_1_start_sriov(adev); if (r) return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v5_0_1_unified_ring_set_wptr(ring); + ring->sched.ready = true; + } + } else { + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst), + adev->vcn.inst[i].aid_id); + + /* Re-init fw_shared, if required */ + vcn_v5_0_1_fw_shared_init(adev, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } return 0; @@ -663,6 +695,195 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, return 0; } +static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev) +{ + int i, vcn_inst; + struct amdgpu_ring *ring_enc; + uint64_t cache_addr; + uint64_t rb_enc_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + uint32_t enabled_vcn; + + struct mmsch_v5_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v5_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v5_0_cmd_end end = { {0} }; + struct mmsch_v5_0_init_header header; + + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + vcn_inst = GET_INST(VCN, i); + + vcn_v5_0_1_fw_shared_init(adev, vcn_inst); + + memset(&header, 0, sizeof(struct mmsch_v5_0_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + table_size = 0; + + MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + + offset = 0; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), 0); + } else { + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset; + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET1), 0); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET2), 0); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); + + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr; + rb_setup = &fw_shared->rb_setup; + + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0]; + ring_enc->wptr = 0; + rb_enc_addr = ring_enc->gpu_addr; + + rb_setup->is_rb_enabled_flags |= RB_ENABLED; + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + MMSCH_V5_0_INSERT_END(); + + header.vcn0.init_status = 0; + header.vcn0.table_offset = header.total_size; + header.vcn0.table_size = table_size; + header.total_size += table_size; + + /* Send init table to mmsch */ + size = sizeof(struct mmsch_v5_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x00000001; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + while (resp != expected) { + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP); + if (resp != 0) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; + init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status; + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) { + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); + } + } + + return 0; +} + /** * vcn_v5_0_1_start - VCN start * @@ -1103,8 +1324,18 @@ static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state) { + struct amdgpu_device *adev = vinst->adev; int ret = 0; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + vinst->cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == vinst->cur_state) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index faa0dd75dd6d..85846fd08ce4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -350,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) if (ret) return ret; } + ih[i]->overflow = false; } if (!amdgpu_sriov_vf(adev)) @@ -437,7 +438,10 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + if (!amdgpu_sriov_vf(adev)) + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + else + ih->overflow = true; /* When a ring buffer overflow happen start parsing interrupt * from the last not overwritten vector (wptr + 32). Hopefully diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 4a5a0a4e00f2..9bde2c64540f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -27,6 +27,16 @@ #include "kfd_priv.h" static struct dentry *debugfs_root; +static struct dentry *debugfs_proc; +static struct list_head procs; + +struct debugfs_proc_entry { + struct list_head list; + struct dentry *proc_dentry; + pid_t pid; +}; + +#define MAX_DEBUGFS_FILENAME_LEN 32 static int kfd_debugfs_open(struct inode *inode, struct file *file) { @@ -92,6 +102,8 @@ static const struct file_operations kfd_debugfs_hang_hws_fops = { void kfd_debugfs_init(void) { debugfs_root = debugfs_create_dir("kfd", NULL); + debugfs_proc = debugfs_create_dir("proc", debugfs_root); + INIT_LIST_HEAD(&procs); debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root, kfd_debugfs_mqds_by_process, &kfd_debugfs_fops); @@ -107,5 +119,69 @@ void kfd_debugfs_init(void) void kfd_debugfs_fini(void) { + debugfs_remove_recursive(debugfs_proc); debugfs_remove_recursive(debugfs_root); } + +static ssize_t kfd_debugfs_pasid_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct kfd_process_device *pdd = file_inode(file)->i_private; + char tmp[32]; + int len; + + len = snprintf(tmp, sizeof(tmp), "%u\n", pdd->pasid); + + return simple_read_from_buffer(buf, count, ppos, tmp, len); +} + +static const struct file_operations kfd_debugfs_pasid_fops = { + .owner = THIS_MODULE, + .read = kfd_debugfs_pasid_read, +}; + +void kfd_debugfs_add_process(struct kfd_process *p) +{ + int i; + char name[MAX_DEBUGFS_FILENAME_LEN]; + struct debugfs_proc_entry *entry; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return; + + list_add(&entry->list, &procs); + entry->pid = p->lead_thread->pid; + snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "%d", + (int)entry->pid); + entry->proc_dentry = debugfs_create_dir(name, debugfs_proc); + + /* Create debugfs files for each GPU: + * - proc//pasid_ + */ + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "pasid_%u", + pdd->dev->id); + debugfs_create_file((const char *)name, S_IFREG | 0444, + entry->proc_dentry, pdd, + &kfd_debugfs_pasid_fops); + } +} + +void kfd_debugfs_remove_process(struct kfd_process *p) +{ + struct debugfs_proc_entry *entry, *next; + + mutex_lock(&kfd_processes_mutex); + list_for_each_entry_safe(entry, next, &procs, list) { + if (entry->pid != p->lead_thread->pid) + continue; + + debugfs_remove_recursive(entry->proc_dentry); + list_del(&entry->list); + kfree(entry); + } + mutex_unlock(&kfd_processes_mutex); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index b9c82be6ce13..bf0854bd5555 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -352,11 +352,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) f2g = &aldebaran_kfd2kgd; break; case IP_VERSION(9, 4, 3): - gfx_target_version = adev->rev_id >= 1 ? 90402 - : adev->flags & AMD_IS_APU ? 90400 - : 90401; - f2g = &gc_9_4_3_kfd2kgd; - break; case IP_VERSION(9, 4, 4): gfx_target_version = 90402; f2g = &gc_9_4_3_kfd2kgd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c610e172a2b8..76359c6a3f3a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1576,8 +1576,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, int bit; if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { - dev_err(dev, "No more SDMA queue to allocate\n"); + if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) { + dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n", + get_num_sdma_queues(dqm)); return -ENOMEM; } @@ -1602,8 +1603,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, q->properties.sdma_queue_id = q->sdma_id / kfd_get_num_sdma_engines(dqm->dev); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { - dev_err(dev, "No more XGMI SDMA queue to allocate\n"); + if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) { + dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n", + get_num_xgmi_sdma_queues(dqm)); return -ENOMEM; } if (restore_sdma_id) { @@ -1662,8 +1664,8 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, } if (!free_bit_found) { - dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", - q->properties.sdma_engine_id); + dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n", + q->properties.sdma_engine_id, num_queues); return -ENOMEM; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index fecdb6794075..e54e708ed82d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -1177,6 +1177,25 @@ void kfd_signal_hw_exception_event(u32 pasid) kfd_unref_process(p); } +void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va) +{ + struct kfd_process_device *pdd; + struct kfd_hsa_memory_exception_data exception_data; + int i; + + memset(&exception_data, 0, sizeof(exception_data)); + exception_data.va = gpu_va; + exception_data.failure.NotPresent = 1; + + // Send VM seg fault to all kfd process device + for (i = 0; i < p->n_pdds; i++) { + pdd = p->pdds[i]; + exception_data.gpu_id = pdd->user_gpu_id; + kfd_evict_process_device(pdd); + kfd_signal_vm_fault_event(pdd, NULL, &exception_data); + } +} + void kfd_signal_vm_fault_event(struct kfd_process_device *pdd, struct kfd_vm_fault_info *info, struct kfd_hsa_memory_exception_data *data) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 37b69fe0ede3..3e1ad8974797 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -168,14 +168,14 @@ static bool event_interrupt_isr_v10(struct kfd_node *dev, client_id != SOC15_IH_CLIENTID_SE3SH) return false; - pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", - client_id, source_id, vmid, pasid); - pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", - data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]); + dev_dbg(dev->adev->dev, + "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); + dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], data[4], data[5], data[6], + data[7]); - /* If there is no valid PASID, it's likely a bug */ - if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) + if (pasid == 0) return 0; /* Interrupt types we care about: various signals and faults. @@ -217,37 +217,66 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n", - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1, - SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - WLT), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE_BUF0_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE_BUF1_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, - THREAD_TRACE_UTC_ERROR)); + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_AUTO_CTXID1, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + WLT), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_BUF0_FULL), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_BUF1_FULL), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SA_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SIMD_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - WGP_ID)); + dev_dbg_ratelimited( + dev->adev->dev, + "sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SA_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SIMD_ID), + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + WGP_ID)); if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) { if (kfd_set_dbg_ev_from_interrupt(dev, pasid, KFD_DEBUG_DOORBELL_ID(context_id0), @@ -259,21 +288,37 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0, ERR_TYPE); - pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SA_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, - SIMD_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, - WGP_ID), + dev_warn_ratelimited( + dev->adev->dev, + "sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SA_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID0, + SIMD_ID), + REG_GET_FIELD( + context_id1, + SQ_INTERRUPT_WORD_WAVE_CTXID1, + WGP_ID), sq_intr_err_type); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index c5f97e6e36ff..2788a52714d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -148,44 +148,69 @@ enum SQ_INTERRUPT_ERROR_TYPE { #define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \ KFD_CTXID0_DOORBELL_ID_MASK) -static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) +static void print_sq_intr_info_auto(struct kfd_node *dev, uint32_t context_id0, + uint32_t context_id1) { - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR)); + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_BUF_FULL), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + REG_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + CMD_TIMESTAMP), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + HOST_CMD_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + HOST_REG_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + IMMED_OVERFLOW), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, + THREAD_TRACE_UTC_ERROR)); } -static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) +static void print_sq_intr_info_inst(struct kfd_node *dev, uint32_t context_id0, + uint32_t context_id1) { - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, + SH_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID), - REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID)); + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, + WAVE_ID), + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, + SIMD_ID), + REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, + WGP_ID)); } -static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1) +static void print_sq_intr_info_error(struct kfd_node *dev, uint32_t context_id0, + uint32_t context_id1) { - pr_warn_ratelimited( + dev_warn_ratelimited( + dev->adev->dev, "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID)); + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + DETAIL), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + TYPE), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + SH_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + PRIV), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, + WAVE_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, + SIMD_ID), + REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, + WGP_ID)); } static void event_interrupt_poison_consumption_v11(struct kfd_node *dev, @@ -255,14 +280,14 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev, (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG)) return false; - pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", - client_id, source_id, vmid, pasid); - pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", - data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]); + dev_dbg(dev->adev->dev, + "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); + dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], data[4], data[5], data[6], + data[7]); - /* If there is no valid PASID, it's likely a bug */ - if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) + if (pasid == 0) return false; /* Interrupt types we care about: various signals and faults. @@ -353,10 +378,10 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); switch (sq_int_enc) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - print_sq_intr_info_auto(context_id0, context_id1); + print_sq_intr_info_auto(dev, context_id0, context_id1); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - print_sq_intr_info_inst(context_id0, context_id1); + print_sq_intr_info_inst(dev, context_id0, context_id1); sq_int_priv = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV); if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid, @@ -366,7 +391,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, return; break; case SQ_INTERRUPT_WORD_ENCODING_ERROR: - print_sq_intr_info_error(context_id0, context_id1); + print_sq_intr_info_error(dev, context_id0, context_id1); sq_int_errtype = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE); if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index b8a91bf4ef30..4ceb251312a6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -314,11 +314,12 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev, & ~pasid_mask) | pasid); } - pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", - client_id, source_id, vmid, pasid); - pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", - data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]); + dev_dbg(dev->adev->dev, + "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); + dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], data[4], data[5], data[6], + data[7]); /* If there is no valid PASID, it's likely a bug */ if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) @@ -379,28 +380,82 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - pr_debug_ratelimited( + dev_dbg_ratelimited( + dev->adev->dev, "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR)); + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + THREAD_TRACE), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + WLT), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + THREAD_TRACE_BUF_FULL), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + REG_TIMESTAMP), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + CMD_TIMESTAMP), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + HOST_CMD_OVERFLOW), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + HOST_REG_OVERFLOW), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + IMMED_OVERFLOW), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_AUTO_CTXID, + THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), + dev_dbg_ratelimited( + dev->adev->dev, + "sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SH_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SIMD_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + CU_ID), sq_int_data); if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) { if (kfd_set_dbg_ev_from_interrupt(dev, pasid, @@ -412,14 +467,37 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE); - pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), - REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), + dev_warn_ratelimited( + dev->adev->dev, + "sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + DATA), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SH_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + PRIV), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + WAVE_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + SIMD_ID), + REG_GET_FIELD( + context_id0, + SQ_INTERRUPT_WORD_WAVE_CTXID, + CU_ID), sq_intr_err); if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 80320a6c8854..97933d2a3803 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -495,6 +495,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; + /* Allow context switch so we don't cross-process starve with a massive + * command buffer of long-running SDMA commands + */ + m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK; q->is_active = QUEUE_IS_ACTIVE(*q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 271c567242ab..b1a6eb349bb3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -31,6 +31,7 @@ #define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0) #define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1) #define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2) +#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3) static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) @@ -44,7 +45,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, - int *over_subscription) + int *over_subscription, + int xnack_conflict) { unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; @@ -73,6 +75,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; if (gws_queue_count > 1) *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; + if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)) + *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT; if (*over_subscription) dev_dbg(dev, "Over subscribed runlist\n"); @@ -96,7 +100,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, unsigned int *rl_buffer_size, - int *is_over_subscription) + int *is_over_subscription, + int xnack_conflict) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; @@ -105,7 +110,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, if (WARN_ON(pm->allocated)) return -EINVAL; - pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription, + xnack_conflict); mutex_lock(&pm->lock); @@ -142,11 +148,27 @@ static int pm_create_runlist_ib(struct packet_manager *pm, struct queue *q; struct kernel_queue *kq; int is_over_subscription; + int xnack_enabled = -1; + bool xnack_conflict = 0; rl_wptr = retval = processes_mapped = 0; + /* Check if processes set different xnack modes */ + list_for_each_entry(cur, queues, list) { + qpd = cur->qpd; + if (xnack_enabled < 0) + /* First process */ + xnack_enabled = qpd->pqm->process->xnack_enabled; + else if (qpd->pqm->process->xnack_enabled != xnack_enabled) { + /* Found a process with a different xnack mode */ + xnack_conflict = 1; + break; + } + } + retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, - &alloc_size_bytes, &is_over_subscription); + &alloc_size_bytes, &is_over_subscription, + xnack_conflict); if (retval) return retval; @@ -156,9 +178,13 @@ static int pm_create_runlist_ib(struct packet_manager *pm, dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count, pm->dqm->active_queue_count); +build_runlist_ib: /* build the run list ib packet */ list_for_each_entry(cur, queues, list) { qpd = cur->qpd; + /* group processes with the same xnack mode together */ + if (qpd->pqm->process->xnack_enabled != xnack_enabled) + continue; /* build map process packet */ if (processes_mapped >= pm->dqm->processes_count) { dev_dbg(dev, "Not enough space left in runlist IB\n"); @@ -215,18 +241,26 @@ static int pm_create_runlist_ib(struct packet_manager *pm, alloc_size_bytes); } } + if (xnack_conflict) { + /* pick up processes with the other xnack mode */ + xnack_enabled = !xnack_enabled; + xnack_conflict = 0; + goto build_runlist_ib; + } dev_dbg(dev, "Finished map process and queues to runlist\n"); if (is_over_subscription) { if (!pm->is_over_subscription) - dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s. Expect reduced ROCm performance.\n", - is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? - " too many processes." : "", - is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? - " too many queues." : "", - is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? - " multiple processes using cooperative launch." : ""); + dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n", + is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? + " too many processes" : "", + is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? + " too many queues" : "", + is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? + " multiple processes using cooperative launch" : "", + is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ? + " xnack on/off processes mixed on gfx9" : ""); retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 2893fd5e5d00..8fa6489b6f5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm, memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); - if (adev->enforce_isolation[kfd->node_id]) + if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE) packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; @@ -102,7 +102,8 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); - if (adev->enforce_isolation[knode->node_id]) + if (adev->enforce_isolation[knode->node_id] == + AMDGPU_ENFORCE_ISOLATION_ENABLE) packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; @@ -165,9 +166,9 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? - 1 : min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); + concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] == + AMDGPU_ENFORCE_ISOLATION_ENABLE) ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; @@ -202,6 +203,8 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_set_resources__hsa_interface_queue_hiq; packet->bitfields2.vmid_mask = res->vmid_mask; packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN) + packet->bitfields2.enb_xnack_retry_disable_check = 1; packet->bitfields7.oac_mask = res->oac_mask; packet->bitfields8.gds_heap_base = res->gds_heap_base; packet->bitfields8.gds_heap_size = res->gds_heap_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index cd8611401a66..e356a207d03c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -63,7 +63,8 @@ struct pm4_mes_set_resources { struct { uint32_t vmid_mask:16; uint32_t unmap_latency:8; - uint32_t reserved1:5; + uint32_t reserved1:4; + uint32_t enb_xnack_retry_disable_check:1; enum mes_set_resources_queue_type_enum queue_type:3; } bitfields2; uint32_t ordinal2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f6aedf69c644..d221c58dccc3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1507,6 +1507,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, int kfd_get_num_events(struct kfd_process *p); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); +void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va); + void kfd_signal_vm_fault_event(struct kfd_process_device *pdd, struct kfd_vm_fault_info *info, struct kfd_hsa_memory_exception_data *data); @@ -1581,10 +1583,15 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev); int pm_debugfs_hang_hws(struct packet_manager *pm); int dqm_debugfs_hang_hws(struct device_queue_manager *dqm); +void kfd_debugfs_add_process(struct kfd_process *p); +void kfd_debugfs_remove_process(struct kfd_process *p); + #else static inline void kfd_debugfs_init(void) {} static inline void kfd_debugfs_fini(void) {} +static inline void kfd_debugfs_add_process(struct kfd_process *p) {} +static inline void kfd_debugfs_remove_process(struct kfd_process *p) {} #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7c0c24732481..722ac1662bdc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -900,6 +900,8 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) kfd_procfs_add_sysfs_files(process); kfd_procfs_add_sysfs_counters(process); + kfd_debugfs_add_process(process); + init_waitqueue_head(&process->wait_irq_drain); } out: @@ -1054,6 +1056,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_process(pdd, false); + pr_debug("Releasing pdd (topology id %d, for pid %d)\n", pdd->dev->id, p->lead_thread->pid); kfd_process_device_destroy_cwsr_dgpu(pdd); @@ -1174,6 +1178,7 @@ static void kfd_process_wq_release(struct work_struct *work) dma_fence_signal(ef); kfd_process_remove_sysfs(p); + kfd_debugfs_remove_process(p); kfd_process_kunmap_signal_bo(p); kfd_process_free_outstanding_kfd_bos(p); @@ -1715,6 +1720,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, pdd->pasid = avm->pasid; pdd->drm_file = drm_file; + kfd_smi_event_process(pdd, true); + return 0; err_get_pasid: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7eb370b68159..6d5fa57d4a23 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -451,8 +451,15 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("process pid %d DQM create queue type %d failed. ret %d\n", - pqm->process->lead_thread->pid, type, retval); + if ((type == KFD_QUEUE_TYPE_SDMA || + type == KFD_QUEUE_TYPE_SDMA_XGMI || + type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) && + retval == -ENOMEM) + pr_warn("process pid %d DQM create queue type %d failed. ret %d\n", + pqm->process->lead_thread->pid, type, retval); + else + pr_err("process pid %d DQM create queue type %d failed. ret %d\n", + pqm->process->lead_thread->pid, type, retval); goto err_create_queue; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 4afff7094caf..a65c67cf56ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -402,7 +402,7 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) { u32 vgpr_size = 0x40000; - if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ + if (gfxv == 90402 || /* GFX_VERSION_AQUA_VANJARAM */ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */ gfxv == 90500) @@ -462,7 +462,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) if (gfxv == 80002) /* GFX_VERSION_TONGA */ props->eop_buffer_size = 0x8000; - else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */ + else if (gfxv == 90402) /* GFX_VERSION_AQUA_VANJARAM */ props->eop_buffer_size = 4096; else if (gfxv >= 80000) props->eop_buffer_size = 4096; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 9b8169761ec5..83d9384ac815 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -163,10 +163,9 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep) static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client, unsigned int event) { - uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS); uint64_t events = READ_ONCE(client->events); - if (pid && client->pid != pid && !(client->suser && (events & all))) + if (pid && client->pid != pid && !client->suser) return false; return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event); @@ -345,6 +344,27 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, pid, address, last - address + 1, node->id, trigger)); } +void kfd_smi_event_process(struct kfd_process_device *pdd, bool start) +{ + struct amdgpu_task_info *task_info; + struct amdgpu_vm *avm; + + if (!pdd->drm_priv) + return; + + avm = drm_priv_to_vm(pdd->drm_priv); + task_info = amdgpu_vm_get_task_info_vm(avm); + + if (task_info) { + kfd_smi_event_add(0, pdd->dev, + start ? KFD_SMI_EVENT_PROCESS_START : + KFD_SMI_EVENT_PROCESS_END, + KFD_EVENT_FMT_PROCESS(task_info->pid, + task_info->task_name)); + amdgpu_vm_put_task_info(task_info); + } +} + int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) { struct kfd_smi_client *client; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index 503bff13d815..bb4d72b57387 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -53,4 +53,5 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm); void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, unsigned long address, unsigned long last, uint32_t trigger); +void kfd_smi_event_process(struct kfd_process_device *pdd, bool start); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 100717a98ec1..72be6e152e88 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1245,8 +1245,7 @@ svm_range_get_pte_flags(struct kfd_node *node, case IP_VERSION(9, 4, 4): case IP_VERSION(9, 5, 0): if (ext_coherent) - mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC; + mtype_local = AMDGPU_VM_MTYPE_CC; else mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 9bbee484d57c..baa2374acdeb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1267,34 +1267,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, { struct kfd_node *gpu = outbound_link->gpu; struct amdgpu_device *adev = gpu->adev; - int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu); + unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu); + uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1; + uint32_t xgmi_sdma_eng_id_mask = + ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && - kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && - (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) && + num_xgmi_nodes == 8); if (support_rec_eng) { int src_socket_id = adev->gmc.xgmi.physical_node_id; int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0; outbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift); inbound_link->rec_sdma_eng_id_mask = - 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; + 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift); + + /* If recommended engine is out of range, need to reset the mask */ + if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) + outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; + if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask) + inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask; + } else { - int num_sdma_eng = kfd_get_num_sdma_engines(gpu); - int i, eng_offset = 0; + uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask : + sdma_eng_id_mask; - if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && - kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { - eng_offset = num_sdma_eng; - num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); - } - - for (i = 0; i < num_sdma_eng; i++) { - outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); - inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); - } + outbound_link->rec_sdma_eng_id_mask = engine_mask; + inbound_link->rec_sdma_eng_id_mask = engine_mask; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index ab2a97e354da..7329b8cc2576 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -38,6 +38,7 @@ AMDGPUDM = \ amdgpu_dm_pp_smu.o \ amdgpu_dm_psr.o \ amdgpu_dm_replay.o \ + amdgpu_dm_quirks.o \ amdgpu_dm_wb.o ifdef CONFIG_DRM_AMD_DC_FP diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a187cdb43e7e..742b10881112 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -80,7 +80,6 @@ #include #include #include -#include #include #include @@ -115,6 +114,8 @@ #include "modules/inc/mod_freesync.h" #include "modules/power/power_helpers.h" +static_assert(AMDGPU_DMUB_NOTIFICATION_MAX == DMUB_NOTIFICATION_MAX, "AMDGPU_DMUB_NOTIFICATION_MAX mismatch"); + #define FIRMWARE_RENOIR_DMUB "amdgpu/renoir_dmcub.bin" MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB); #define FIRMWARE_SIENNA_CICHLID_DMUB "amdgpu/sienna_cichlid_dmcub.bin" @@ -280,7 +281,7 @@ static u32 dm_vblank_get_counter(struct amdgpu_device *adev, int crtc) acrtc = adev->mode_info.crtcs[crtc]; if (!acrtc->dm_irq_params.stream) { - DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n", + drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n", crtc); return 0; } @@ -301,7 +302,7 @@ static int dm_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, acrtc = adev->mode_info.crtcs[crtc]; if (!acrtc->dm_irq_params.stream) { - DRM_ERROR("dc_stream_state is NULL for crtc '%d'!\n", + drm_err(adev_to_drm(adev), "dc_stream_state is NULL for crtc '%d'!\n", crtc); return 0; } @@ -757,6 +758,29 @@ static void dmub_aux_setconfig_callback(struct amdgpu_device *adev, complete(&adev->dm.dmub_aux_transfer_done); } +static void dmub_aux_fused_io_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) +{ + if (!adev || !notify) { + ASSERT(false); + return; + } + + const struct dmub_cmd_fused_request *req = ¬ify->fused_request; + const uint8_t ddc_line = req->u.aux.ddc_line; + + if (ddc_line >= ARRAY_SIZE(adev->dm.fused_io)) { + ASSERT(false); + return; + } + + struct fused_io_sync *sync = &adev->dm.fused_io[ddc_line]; + + static_assert(sizeof(*req) <= sizeof(sync->reply_data), "Size mismatch"); + memcpy(sync->reply_data, req, sizeof(*req)); + complete(&sync->replied); +} + /** * dmub_hpd_callback - DMUB HPD interrupt processing callback. * @adev: amdgpu_device pointer @@ -780,18 +804,18 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, return; if (notify == NULL) { - DRM_ERROR("DMUB HPD callback notification was NULL"); + drm_err(adev_to_drm(adev), "DMUB HPD callback notification was NULL"); return; } if (notify->link_index > adev->dm.dc->link_count) { - DRM_ERROR("DMUB HPD index (%u)is abnormal", notify->link_index); + drm_err(adev_to_drm(adev), "DMUB HPD index (%u)is abnormal", notify->link_index); return; } /* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */ if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) { - DRM_INFO("Skip DMUB HPD IRQ callback in suspend/resume\n"); + drm_info(adev_to_drm(adev), "Skip DMUB HPD IRQ callback in suspend/resume\n"); return; } @@ -808,11 +832,11 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, aconnector = to_amdgpu_dm_connector(connector); if (link && aconnector->dc_link == link) { if (notify->type == DMUB_NOTIFICATION_HPD) - DRM_INFO("DMUB HPD IRQ callback: link_index=%u\n", link_index); + drm_info(adev_to_drm(adev), "DMUB HPD IRQ callback: link_index=%u\n", link_index); else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) - DRM_INFO("DMUB HPD RX IRQ callback: link_index=%u\n", link_index); + drm_info(adev_to_drm(adev), "DMUB HPD RX IRQ callback: link_index=%u\n", link_index); else - DRM_WARN("DMUB Unknown HPD callback type %d, link_index=%u\n", + drm_warn(adev_to_drm(adev), "DMUB Unknown HPD callback type %d, link_index=%u\n", notify->type, link_index); hpd_aconnector = aconnector; @@ -824,7 +848,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, if (hpd_aconnector) { if (notify->type == DMUB_NOTIFICATION_HPD) { if (hpd_aconnector->dc_link->hpd_status == (notify->hpd_status == DP_HPD_PLUG)) - DRM_WARN("DMUB reported hpd status unchanged. link_index=%u\n", link_index); + drm_warn(adev_to_drm(adev), "DMUB reported hpd status unchanged. link_index=%u\n", link_index); handle_hpd_irq_helper(hpd_aconnector); } else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ) { handle_hpd_rx_irq(hpd_aconnector); @@ -843,7 +867,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, static void dmub_hpd_sense_callback(struct amdgpu_device *adev, struct dmub_notification *notify) { - DRM_DEBUG_DRIVER("DMUB HPD SENSE callback.\n"); + drm_dbg_driver(adev_to_drm(adev), "DMUB HPD SENSE callback.\n"); } /** @@ -879,7 +903,7 @@ static void dm_handle_hpd_work(struct work_struct *work) dmub_hpd_wrk = container_of(work, struct dmub_hpd_work, handle_hpd_work); if (!dmub_hpd_wrk->dmub_notify) { - DRM_ERROR("dmub_hpd_wrk dmub_notify is NULL"); + drm_err(adev_to_drm(dmub_hpd_wrk->adev), "dmub_hpd_wrk dmub_notify is NULL"); return; } @@ -893,6 +917,30 @@ static void dm_handle_hpd_work(struct work_struct *work) } +static const char *dmub_notification_type_str(enum dmub_notification_type e) +{ + switch (e) { + case DMUB_NOTIFICATION_NO_DATA: + return "NO_DATA"; + case DMUB_NOTIFICATION_AUX_REPLY: + return "AUX_REPLY"; + case DMUB_NOTIFICATION_HPD: + return "HPD"; + case DMUB_NOTIFICATION_HPD_IRQ: + return "HPD_IRQ"; + case DMUB_NOTIFICATION_SET_CONFIG_REPLY: + return "SET_CONFIG_REPLY"; + case DMUB_NOTIFICATION_DPIA_NOTIFICATION: + return "DPIA_NOTIFICATION"; + case DMUB_NOTIFICATION_HPD_SENSE_NOTIFY: + return "HPD_SENSE_NOTIFY"; + case DMUB_NOTIFICATION_FUSED_IO: + return "FUSED_IO"; + default: + return ""; + } +} + #define DMUB_TRACE_MAX_READ 64 /** * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt @@ -910,22 +958,13 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) struct dmcub_trace_buf_entry entry = { 0 }; u32 count = 0; struct dmub_hpd_work *dmub_hpd_wrk; - static const char *const event_type[] = { - "NO_DATA", - "AUX_REPLY", - "HPD", - "HPD_IRQ", - "SET_CONFIGC_REPLY", - "DPIA_NOTIFICATION", - "HPD_SENSE_NOTIFY", - }; do { if (dc_dmub_srv_get_dmub_outbox0_msg(dm->dc, &entry)) { trace_amdgpu_dmub_trace_high_irq(entry.trace_code, entry.tick_count, entry.param0, entry.param1); - DRM_DEBUG_DRIVER("trace_code:%u, tick_count:%u, param0:%u, param1:%u\n", + drm_dbg_driver(adev_to_drm(adev), "trace_code:%u, tick_count:%u, param0:%u, param1:%u\n", entry.trace_code, entry.tick_count, entry.param0, entry.param1); } else break; @@ -935,7 +974,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) } while (count <= DMUB_TRACE_MAX_READ); if (count > DMUB_TRACE_MAX_READ) - DRM_DEBUG_DRIVER("Warning : count > DMUB_TRACE_MAX_READ"); + drm_dbg_driver(adev_to_drm(adev), "Warning : count > DMUB_TRACE_MAX_READ"); if (dc_enable_dmub_notifications(adev->dm.dc) && irq_params->irq_src == DC_IRQ_SOURCE_DMCUB_OUTBOX) { @@ -943,25 +982,25 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) { - DRM_ERROR("DM: notify type %d invalid!", notify.type); + drm_err(adev_to_drm(adev), "DM: notify type %d invalid!", notify.type); continue; } if (!dm->dmub_callback[notify.type]) { - DRM_WARN("DMUB notification skipped due to no handler: type=%s\n", - event_type[notify.type]); + drm_warn(adev_to_drm(adev), "DMUB notification skipped due to no handler: type=%s\n", + dmub_notification_type_str(notify.type)); continue; } if (dm->dmub_thread_offload[notify.type] == true) { dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC); if (!dmub_hpd_wrk) { - DRM_ERROR("Failed to allocate dmub_hpd_wrk"); + drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk"); return; } dmub_hpd_wrk->dmub_notify = kmemdup(¬ify, sizeof(struct dmub_notification), GFP_ATOMIC); if (!dmub_hpd_wrk->dmub_notify) { kfree(dmub_hpd_wrk); - DRM_ERROR("Failed to allocate dmub_hpd_wrk->dmub_notify"); + drm_err(adev_to_drm(adev), "Failed to allocate dmub_hpd_wrk->dmub_notify"); return; } INIT_WORK(&dmub_hpd_wrk->handle_hpd_work, dm_handle_hpd_work); @@ -1019,10 +1058,10 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector) &compressor->gpu_addr, &compressor->cpu_addr); if (r) - DRM_ERROR("DM: Failed to initialize FBC\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize FBC\n"); else { adev->dm.dc->ctx->fbc_gpu_addr = compressor->gpu_addr; - DRM_INFO("DM: FBC alloc %lu\n", max_size*4); + drm_info(adev_to_drm(adev), "DM: FBC alloc %lu\n", max_size*4); } } @@ -1187,13 +1226,13 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; if (!fb_info) { - DRM_ERROR("No framebuffer info for DMUB service.\n"); + drm_err(adev_to_drm(adev), "No framebuffer info for DMUB service.\n"); return -EINVAL; } if (!dmub_fw) { /* Firmware required for DMUB support. */ - DRM_ERROR("No firmware provided for DMUB.\n"); + drm_err(adev_to_drm(adev), "No firmware provided for DMUB.\n"); return -EINVAL; } @@ -1203,19 +1242,19 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) status = dmub_srv_has_hw_support(dmub_srv, &has_hw_support); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error checking HW support for DMUB: %d\n", status); + drm_err(adev_to_drm(adev), "Error checking HW support for DMUB: %d\n", status); return -EINVAL; } if (!has_hw_support) { - DRM_INFO("DMUB unsupported on ASIC\n"); + drm_info(adev_to_drm(adev), "DMUB unsupported on ASIC\n"); return 0; } /* Reset DMCUB if it was previously running - before we overwrite its memory. */ status = dmub_srv_hw_reset(dmub_srv); if (status != DMUB_STATUS_OK) - DRM_WARN("Error resetting DMUB HW: %d\n", status); + drm_warn(adev_to_drm(adev), "Error resetting DMUB HW: %d\n", status); hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; @@ -1298,6 +1337,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) case IP_VERSION(3, 5, 1): case IP_VERSION(3, 6, 0): hw_params.ips_sequential_ono = adev->external_rev_id > 0x10; + hw_params.lower_hbr3_phy_ssc = true; break; default: break; @@ -1305,14 +1345,14 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) status = dmub_srv_hw_init(dmub_srv, &hw_params); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error initializing DMUB HW: %d\n", status); + drm_err(adev_to_drm(adev), "Error initializing DMUB HW: %d\n", status); return -EINVAL; } /* Wait for firmware load to finish. */ status = dmub_srv_wait_for_auto_load(dmub_srv, 100000); if (status != DMUB_STATUS_OK) - DRM_WARN("Wait for DMUB auto-load failed: %d\n", status); + drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status); /* Init DMCU and ABM if available. */ if (dmcu && abm) { @@ -1323,11 +1363,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) if (!adev->dm.dc->ctx->dmub_srv) adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv); if (!adev->dm.dc->ctx->dmub_srv) { - DRM_ERROR("Couldn't allocate DC DMUB server!\n"); + drm_err(adev_to_drm(adev), "Couldn't allocate DC DMUB server!\n"); return -ENOMEM; } - DRM_INFO("DMUB hardware initialized: version=0x%08X\n", + drm_info(adev_to_drm(adev), "DMUB hardware initialized: version=0x%08X\n", adev->dm.dmcub_fw_version); /* Keeping sanity checks off if @@ -1370,18 +1410,18 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev) status = dmub_srv_is_hw_init(dmub_srv, &init); if (status != DMUB_STATUS_OK) - DRM_WARN("DMUB hardware init check failed: %d\n", status); + drm_warn(adev_to_drm(adev), "DMUB hardware init check failed: %d\n", status); if (status == DMUB_STATUS_OK && init) { /* Wait for firmware load to finish. */ status = dmub_srv_wait_for_auto_load(dmub_srv, 100000); if (status != DMUB_STATUS_OK) - DRM_WARN("Wait for DMUB auto-load failed: %d\n", status); + drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status); } else { /* Perform the full hardware initialization. */ r = dm_dmub_hw_init(adev); if (r) - DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); + drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r); } } @@ -1491,18 +1531,18 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work) offload_work = container_of(work, struct hpd_rx_irq_offload_work, work); aconnector = offload_work->offload_wq->aconnector; + adev = offload_work->adev; if (!aconnector) { - DRM_ERROR("Can't retrieve aconnector in hpd_rx_irq_offload_work"); + drm_err(adev_to_drm(adev), "Can't retrieve aconnector in hpd_rx_irq_offload_work"); goto skip; } - adev = drm_to_adev(aconnector->base.dev); dc_link = aconnector->dc_link; mutex_lock(&aconnector->hpd_lock); if (!dc_link_detect_connection_type(dc_link, &new_connection_type)) - DRM_ERROR("KMS: Failed to detect connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n"); mutex_unlock(&aconnector->hpd_lock); if (new_connection_type == dc_connection_none) @@ -1571,8 +1611,9 @@ skip: } -static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct dc *dc) +static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct amdgpu_device *adev) { + struct dc *dc = adev->dm.dc; int max_caps = dc->caps.max_links; int i = 0; struct hpd_rx_irq_offload_work_queue *hpd_rx_offload_wq = NULL; @@ -1588,7 +1629,7 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct create_singlethread_workqueue("amdgpu_dm_hpd_rx_offload_wq"); if (hpd_rx_offload_wq[i].wq == NULL) { - DRM_ERROR("create amdgpu_dm_hpd_rx_offload_wq fail!"); + drm_err(adev_to_drm(adev), "create amdgpu_dm_hpd_rx_offload_wq fail!"); goto out_err; } @@ -1637,153 +1678,6 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } -struct amdgpu_dm_quirks { - bool aux_hpd_discon; - bool support_edp0_on_dp1; -}; - -static struct amdgpu_dm_quirks quirk_entries = { - .aux_hpd_discon = false, - .support_edp0_on_dp1 = false -}; - -static int edp0_on_dp1_callback(const struct dmi_system_id *id) -{ - quirk_entries.support_edp0_on_dp1 = true; - return 0; -} - -static int aux_hpd_discon_callback(const struct dmi_system_id *id) -{ - quirk_entries.aux_hpd_discon = true; - return 0; -} - -static const struct dmi_system_id dmi_quirk_table[] = { - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), - }, - }, - { - .callback = aux_hpd_discon_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), - }, - }, - { - .callback = edp0_on_dp1_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), - }, - }, - { - .callback = edp0_on_dp1_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"), - }, - }, - { - .callback = edp0_on_dp1_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), - }, - }, - { - .callback = edp0_on_dp1_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"), - }, - }, - { - .callback = edp0_on_dp1_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"), - }, - }, - {} - /* TODO: refactor this from a fixed table to a dynamic option */ -}; - -static void retrieve_dmi_info(struct amdgpu_display_manager *dm, struct dc_init_data *init_data) -{ - int dmi_id; - struct drm_device *dev = dm->ddev; - - dm->aux_hpd_discon_quirk = false; - init_data->flags.support_edp0_on_dp1 = false; - - dmi_id = dmi_check_system(dmi_quirk_table); - - if (!dmi_id) - return; - - if (quirk_entries.aux_hpd_discon) { - dm->aux_hpd_discon_quirk = true; - drm_info(dev, "aux_hpd_discon_quirk attached\n"); - } - if (quirk_entries.support_edp0_on_dp1) { - init_data->flags.support_edp0_on_dp1 = true; - drm_info(dev, "support_edp0_on_dp1 attached\n"); - } -} void* dm_allocate_gpu_mem( @@ -1959,7 +1853,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) mutex_init(&adev->dm.audio_lock); if (amdgpu_dm_irq_init(adev)) { - DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize DM IRQ support.\n"); goto error; } @@ -2070,7 +1964,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; - retrieve_dmi_info(&adev->dm, &init_data); + retrieve_dmi_info(&adev->dm); + if (adev->dm.edp0_on_dp1_quirk) + init_data.flags.support_edp0_on_dp1 = true; if (adev->dm.bb_from_dmub) init_data.bb_from_dmub = adev->dm.bb_from_dmub; @@ -2081,10 +1977,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc = dc_create(&init_data); if (adev->dm.dc) { - DRM_INFO("Display Core v%s initialized on %s\n", DC_VER, + drm_info(adev_to_drm(adev), "Display Core v%s initialized on %s\n", DC_VER, dce_version_to_string(adev->dm.dc->ctx->dce_version)); } else { - DRM_INFO("Display Core failed to initialize with v%s!\n", DC_VER); + drm_info(adev_to_drm(adev), "Display Core failed to initialize with v%s!\n", DC_VER); goto error; } @@ -2118,25 +2014,31 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc->debug.using_dml21 = true; } + if (amdgpu_dc_debug_mask & DC_HDCP_LC_FORCE_FW_ENABLE) + adev->dm.dc->debug.hdcp_lc_force_fw_enable = true; + + if (amdgpu_dc_debug_mask & DC_HDCP_LC_ENABLE_SW_FALLBACK) + adev->dm.dc->debug.hdcp_lc_enable_sw_fallback = true; + adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm; /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */ adev->dm.dc->debug.ignore_cable_id = true; if (adev->dm.dc->caps.dp_hdmi21_pcon_support) - DRM_INFO("DP-HDMI FRL PCON supported\n"); + drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n"); r = dm_dmub_hw_init(adev); if (r) { - DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); + drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r); goto error; } dc_hardware_init(adev->dm.dc); - adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev->dm.dc); + adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { - DRM_ERROR("amdgpu: failed to create hpd rx offload workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd rx offload workqueue.\n"); goto error; } @@ -2151,10 +2053,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.freesync_module = mod_freesync_create(adev->dm.dc); if (!adev->dm.freesync_module) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize freesync_module.\n"); } else - DRM_DEBUG_DRIVER("amdgpu: freesync_module init done %p.\n", + drm_dbg_driver(adev_to_drm(adev), "amdgpu: freesync_module init done %p.\n", adev->dm.freesync_module); amdgpu_dm_init_color_mod(); @@ -2163,7 +2065,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.vblank_control_workqueue = create_singlethread_workqueue("dm_vblank_control_workqueue"); if (!adev->dm.vblank_control_workqueue) - DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize vblank_workqueue.\n"); } if (adev->dm.dc->caps.ips_support && @@ -2174,9 +2076,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc); if (!adev->dm.hdcp_workqueue) - DRM_ERROR("amdgpu: failed to initialize hdcp_workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize hdcp_workqueue.\n"); else - DRM_DEBUG_DRIVER("amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue); + drm_dbg_driver(adev_to_drm(adev), "amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue); dc_init_callbacks(adev->dm.dc, &init_params); } @@ -2184,20 +2086,29 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_completion(&adev->dm.dmub_aux_transfer_done); adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL); if (!adev->dm.dmub_notify) { - DRM_INFO("amdgpu: fail to allocate adev->dm.dmub_notify"); + drm_info(adev_to_drm(adev), "amdgpu: fail to allocate adev->dm.dmub_notify"); goto error; } adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq"); if (!adev->dm.delayed_hpd_wq) { - DRM_ERROR("amdgpu: failed to create hpd offload workqueue.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd offload workqueue.\n"); goto error; } amdgpu_dm_outbox_init(adev); if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY, dmub_aux_setconfig_callback, false)) { - DRM_ERROR("amdgpu: fail to register dmub aux callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub aux callback"); + goto error; + } + + for (size_t i = 0; i < ARRAY_SIZE(adev->dm.fused_io); i++) + init_completion(&adev->dm.fused_io[i].replied); + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_FUSED_IO, + dmub_aux_fused_io_callback, false)) { + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub fused io callback"); goto error; } /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. @@ -2214,7 +2125,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } if (amdgpu_dm_initialize_drm_device(adev)) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize sw for display support.\n"); goto error; } @@ -2229,7 +2140,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.cursor_height = adev->dm.dc->caps.max_cursor_size; if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize sw for display support.\n"); goto error; } @@ -2237,14 +2148,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) amdgpu_dm_crtc_secure_display_create_contexts(adev); if (!adev->dm.secure_display_ctx.crtc_ctx) - DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to initialize secure display contexts.\n"); if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1)) adev->dm.secure_display_ctx.support_mul_roi = true; #endif - DRM_DEBUG_DRIVER("KMS initialized.\n"); + drm_dbg_driver(adev_to_drm(adev), "KMS initialized.\n"); return 0; error: @@ -2417,7 +2328,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) default: break; } - DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); + drm_err(adev_to_drm(adev), "Unsupported ASIC type: 0x%X\n", adev->asic_type); return -EINVAL; } @@ -2435,7 +2346,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } if (r) { - dev_err(adev->dev, "amdgpu_dm: Can't validate firmware \"%s\"\n", + drm_err(adev_to_drm(adev), "amdgpu_dm: Can't validate firmware \"%s\"\n", fw_name_dmcu); amdgpu_ucode_release(&adev->dm.fw_dmcu); return r; @@ -2560,7 +2471,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->inst_const_bytes), PAGE_SIZE); - DRM_INFO("Loading DMUB firmware via PSP: version=0x%08X\n", + drm_info(adev_to_drm(adev), "Loading DMUB firmware via PSP: version=0x%08X\n", adev->dm.dmcub_fw_version); } @@ -2569,7 +2480,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) dmub_srv = adev->dm.dmub_srv; if (!dmub_srv) { - DRM_ERROR("Failed to allocate DMUB service!\n"); + drm_err(adev_to_drm(adev), "Failed to allocate DMUB service!\n"); return -ENOMEM; } @@ -2582,7 +2493,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) /* Create the DMUB service. */ status = dmub_srv_create(dmub_srv, &create_params); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error creating DMUB service: %d\n", status); + drm_err(adev_to_drm(adev), "Error creating DMUB service: %d\n", status); return -EINVAL; } @@ -2607,7 +2518,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) ®ion_info); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error calculating DMUB region info: %d\n", status); + drm_err(adev_to_drm(adev), "Error calculating DMUB region info: %d\n", status); return -EINVAL; } @@ -2636,14 +2547,14 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) fb_info = adev->dm.dmub_fb_info; if (!fb_info) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "Failed to allocate framebuffer info for DMUB service!\n"); return -ENOMEM; } status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info); if (status != DMUB_STATUS_OK) { - DRM_ERROR("Error calculating DMUB FB info: %d\n", status); + drm_err(adev_to_drm(adev), "Error calculating DMUB FB info: %d\n", status); return -EINVAL; } @@ -2660,7 +2571,7 @@ static int dm_sw_init(struct amdgpu_ip_block *ip_block) adev->dm.cgs_device = amdgpu_cgs_create_device(adev); if (!adev->dm.cgs_device) { - DRM_ERROR("amdgpu: failed to create cgs device.\n"); + drm_err(adev_to_drm(adev), "amdgpu: failed to create cgs device.\n"); return -EINVAL; } @@ -2966,7 +2877,7 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) ret = amdgpu_dpm_write_watermarks_table(adev); if (ret) { - DRM_ERROR("Failed to update WMTABLE!\n"); + drm_err(adev_to_drm(adev), "Failed to update WMTABLE!\n"); return ret; } @@ -2984,13 +2895,13 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) if (oem_ddc_service) { oem_i2c = create_i2c(oem_ddc_service, true); if (!oem_i2c) { - dev_info(adev->dev, "Failed to create oem i2c adapter data\n"); + drm_info(adev_to_drm(adev), "Failed to create oem i2c adapter data\n"); return -ENOMEM; } r = i2c_add_adapter(&oem_i2c->base); if (r) { - dev_info(adev->dev, "Failed to register oem i2c\n"); + drm_info(adev_to_drm(adev), "Failed to register oem i2c\n"); kfree(oem_i2c); return r; } @@ -3033,7 +2944,7 @@ static int dm_hw_init(struct amdgpu_ip_block *ip_block) r = dm_oem_i2c_hw_init(adev); if (r) - dev_info(adev->dev, "Failed to add OEM i2c bus\n"); + drm_info(adev_to_drm(adev), "Failed to add OEM i2c bus\n"); return 0; } @@ -3076,7 +2987,7 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst; rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; if (rc) - DRM_WARN("Failed to %s pflip interrupts\n", + drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n", enable ? "enable" : "disable"); if (enable) { @@ -3086,14 +2997,14 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false); if (rc) - DRM_WARN("Failed to %sable vupdate interrupt\n", enable ? "en" : "dis"); + drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", enable ? "en" : "dis"); irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; /* During gpu-reset we disable and then enable vblank irq, so * don't use amdgpu_irq_get/put() to avoid refcount change. */ if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) - DRM_WARN("Failed to %sable vblank interrupt\n", enable ? "en" : "dis"); + drm_warn(adev_to_drm(adev), "Failed to %sable vblank interrupt\n", enable ? "en" : "dis"); } } @@ -3923,20 +3834,21 @@ static void handle_hpd_irq(void *param) } -static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq, +static void schedule_hpd_rx_offload_work(struct amdgpu_device *adev, struct hpd_rx_irq_offload_work_queue *offload_wq, union hpd_irq_data hpd_irq_data) { struct hpd_rx_irq_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL); if (!offload_work) { - DRM_ERROR("Failed to allocate hpd_rx_irq_offload_work.\n"); + drm_err(adev_to_drm(adev), "Failed to allocate hpd_rx_irq_offload_work.\n"); return; } INIT_WORK(&offload_work->work, dm_handle_hpd_rx_offload_work); offload_work->data = hpd_irq_data; offload_work->offload_wq = offload_wq; + offload_work->adev = adev; queue_work(offload_wq->wq, &offload_work->work); DRM_DEBUG_KMS("queue work to handle hpd_rx offload work"); @@ -3978,7 +3890,7 @@ static void handle_hpd_rx_irq(void *param) goto out; if (hpd_irq_data.bytes.device_service_irq.bits.AUTOMATED_TEST) { - schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data); goto out; } @@ -4000,7 +3912,7 @@ static void handle_hpd_rx_irq(void *param) spin_unlock(&offload_wq->offload_lock); if (!skip) - schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data); goto out; } @@ -4017,7 +3929,7 @@ static void handle_hpd_rx_irq(void *param) spin_unlock(&offload_wq->offload_lock); if (!skip) - schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data); + schedule_hpd_rx_offload_work(adev, offload_wq, hpd_irq_data); goto out; } @@ -4027,7 +3939,7 @@ out: if (result && !is_mst_root_connector) { /* Downstream Port status changed. */ if (!dc_link_detect_connection_type(dc_link, &new_connection_type)) - DRM_ERROR("KMS: Failed to detect connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n"); if (aconnector->base.force && new_connection_type == dc_connection_none) { emulated_link_detect(dc_link); @@ -4090,19 +4002,19 @@ static int register_hpd_handlers(struct amdgpu_device *adev) if (dc_is_dmub_outbox_supported(adev->dm.dc)) { if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback"); return -EINVAL; } if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback"); return -EINVAL; } if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, dmub_hpd_sense_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd sense callback"); + drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd sense callback"); return -EINVAL; } } @@ -4123,7 +4035,7 @@ static int register_hpd_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_HPD1 || int_params.irq_source > DC_IRQ_SOURCE_HPD6) { - DRM_ERROR("Failed to register hpd irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register hpd irq!\n"); return -EINVAL; } @@ -4141,7 +4053,7 @@ static int register_hpd_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_HPD1RX || int_params.irq_source > DC_IRQ_SOURCE_HPD6RX) { - DRM_ERROR("Failed to register hpd rx irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register hpd rx irq!\n"); return -EINVAL; } @@ -4183,7 +4095,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_irq_add_id(adev, client_id, i + 1, &adev->crtc_irq); if (r) { - DRM_ERROR("Failed to add crtc irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n"); return r; } @@ -4194,7 +4106,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 || int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) { - DRM_ERROR("Failed to register vblank irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n"); return -EINVAL; } @@ -4213,7 +4125,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq); if (r) { - DRM_ERROR("Failed to add page flip irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n"); return r; } @@ -4224,7 +4136,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST || int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) { - DRM_ERROR("Failed to register pflip irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n"); return -EINVAL; } @@ -4242,7 +4154,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, client_id, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) { - DRM_ERROR("Failed to add hpd irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n"); return r; } @@ -4284,7 +4196,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) for (i = VISLANDS30_IV_SRCID_D1_VERTICAL_INTERRUPT0; i <= VISLANDS30_IV_SRCID_D6_VERTICAL_INTERRUPT0; i++) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->crtc_irq); if (r) { - DRM_ERROR("Failed to add crtc irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n"); return r; } @@ -4295,7 +4207,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 || int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) { - DRM_ERROR("Failed to register vblank irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n"); return -EINVAL; } @@ -4313,7 +4225,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) for (i = VISLANDS30_IV_SRCID_D1_V_UPDATE_INT; i <= VISLANDS30_IV_SRCID_D6_V_UPDATE_INT; i += 2) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->vupdate_irq); if (r) { - DRM_ERROR("Failed to add vupdate irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n"); return r; } @@ -4324,7 +4236,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 || int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) { - DRM_ERROR("Failed to register vupdate irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n"); return -EINVAL; } @@ -4343,7 +4255,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) i <= VISLANDS30_IV_SRCID_D6_GRPH_PFLIP; i += 2) { r = amdgpu_irq_add_id(adev, client_id, i, &adev->pageflip_irq); if (r) { - DRM_ERROR("Failed to add page flip irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n"); return r; } @@ -4354,7 +4266,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST || int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) { - DRM_ERROR("Failed to register pflip irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n"); return -EINVAL; } @@ -4372,7 +4284,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, client_id, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) { - DRM_ERROR("Failed to add hpd irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n"); return r; } @@ -4422,7 +4334,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->crtc_irq); if (r) { - DRM_ERROR("Failed to add crtc irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add crtc irq id!\n"); return r; } @@ -4433,7 +4345,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VBLANK1 || int_params.irq_source > DC_IRQ_SOURCE_VBLANK6) { - DRM_ERROR("Failed to register vblank irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vblank irq!\n"); return -EINVAL; } @@ -4454,7 +4366,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) vrtl_int_srcid[i], &adev->vline0_irq); if (r) { - DRM_ERROR("Failed to add vline0 irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add vline0 irq id!\n"); return r; } @@ -4465,7 +4377,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_DC1_VLINE0 || int_params.irq_source > DC_IRQ_SOURCE_DC6_VLINE0) { - DRM_ERROR("Failed to register vline0 irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vline0 irq!\n"); return -EINVAL; } @@ -4493,7 +4405,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq); if (r) { - DRM_ERROR("Failed to add vupdate irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add vupdate irq id!\n"); return r; } @@ -4504,7 +4416,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_VUPDATE1 || int_params.irq_source > DC_IRQ_SOURCE_VUPDATE6) { - DRM_ERROR("Failed to register vupdate irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register vupdate irq!\n"); return -EINVAL; } @@ -4524,7 +4436,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) i++) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq); if (r) { - DRM_ERROR("Failed to add page flip irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add page flip irq id!\n"); return r; } @@ -4535,7 +4447,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) if (int_params.irq_source == DC_IRQ_SOURCE_INVALID || int_params.irq_source < DC_IRQ_SOURCE_PFLIP_FIRST || int_params.irq_source > DC_IRQ_SOURCE_PFLIP_LAST) { - DRM_ERROR("Failed to register pflip irq!\n"); + drm_err(adev_to_drm(adev), "Failed to register pflip irq!\n"); return -EINVAL; } @@ -4553,7 +4465,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DC_HPD1_INT, &adev->hpd_irq); if (r) { - DRM_ERROR("Failed to add hpd irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add hpd irq id!\n"); return r; } @@ -4575,7 +4487,7 @@ static int register_outbox_irq_handlers(struct amdgpu_device *adev) r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, DCN_1_0__SRCID__DMCUB_OUTBOX_LOW_PRIORITY_READY_INT, &adev->dmub_outbox_irq); if (r) { - DRM_ERROR("Failed to add outbox irq id!\n"); + drm_err(adev_to_drm(adev), "Failed to add outbox irq id!\n"); return r; } @@ -4807,41 +4719,54 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } -static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps, - uint32_t brightness) +static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, + uint32_t *brightness) { - unsigned int min, max; u8 prev_signal = 0, prev_lum = 0; + int i = 0; - if (!get_brightness_range(caps, &min, &max)) - return brightness; + if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE) + return; - for (int i = 0; i < caps->data_points; i++) { - u8 signal, lum; + if (!caps->data_points) + return; - if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE) - break; - - signal = caps->luminance_data[i].input_signal; - lum = caps->luminance_data[i].luminance; + /* choose start to run less interpolation steps */ + if (caps->luminance_data[caps->data_points/2].input_signal > *brightness) + i = caps->data_points/2; + do { + u8 signal = caps->luminance_data[i].input_signal; + u8 lum = caps->luminance_data[i].luminance; /* * brightness == signal: luminance is percent numerator * brightness < signal: interpolate between previous and current luminance numerator * brightness > signal: find next data point */ - if (brightness < signal) - lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * - (brightness - prev_signal), - signal - prev_signal); - else if (brightness > signal) { + if (*brightness > signal) { prev_signal = signal; prev_lum = lum; + i++; continue; } - brightness = DIV_ROUND_CLOSEST(lum * brightness, 101); - break; - } + if (*brightness < signal) + lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * + (*brightness - prev_signal), + signal - prev_signal); + *brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101); + return; + } while (i < caps->data_points); +} + +static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps, + uint32_t brightness) +{ + unsigned int min, max; + + if (!get_brightness_range(caps, &min, &max)) + return brightness; + + convert_custom_brightness(caps, &brightness); // Rescale 0..255 to min..max return min + DIV_ROUND_CLOSEST((max - min) * brightness, @@ -5020,10 +4945,10 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) dm->brightness[aconnector->bl_idx] = props.brightness; if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) { - DRM_ERROR("DM: Backlight registration failed!\n"); + drm_err(drm, "DM: Backlight registration failed!\n"); dm->backlight_dev[aconnector->bl_idx] = NULL; } else - DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name); + drm_dbg_driver(drm, "DM: Registered Backlight device: %s\n", bl_name); } static int initialize_plane(struct amdgpu_display_manager *dm, @@ -5037,7 +4962,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm, plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL); if (!plane) { - DRM_ERROR("KMS: Failed to allocate plane\n"); + drm_err(adev_to_drm(dm->adev), "KMS: Failed to allocate plane\n"); return -ENOMEM; } plane->type = plane_type; @@ -5055,7 +4980,7 @@ static int initialize_plane(struct amdgpu_display_manager *dm, ret = amdgpu_dm_plane_init(dm, plane, possible_crtcs, plane_cap); if (ret) { - DRM_ERROR("KMS: Failed to initialize plane\n"); + drm_err(adev_to_drm(dm->adev), "KMS: Failed to initialize plane\n"); kfree(plane); return ret; } @@ -5124,14 +5049,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) link_cnt = dm->dc->caps.max_links; if (amdgpu_dm_mode_config_init(dm->adev)) { - DRM_ERROR("DM: Failed to initialize mode config\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize mode config\n"); return -EINVAL; } /* There is one primary plane per CRTC */ primary_planes = dm->dc->caps.max_streams; if (primary_planes > AMDGPU_MAX_PLANES) { - DRM_ERROR("DM: Plane nums out of 6 planes\n"); + drm_err(adev_to_drm(adev), "DM: Plane nums out of 6 planes\n"); return -EINVAL; } @@ -5144,7 +5069,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (initialize_plane(dm, mode_info, i, DRM_PLANE_TYPE_PRIMARY, plane)) { - DRM_ERROR("KMS: Failed to initialize primary plane\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize primary plane\n"); goto fail; } } @@ -5176,14 +5101,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (initialize_plane(dm, NULL, primary_planes + i, DRM_PLANE_TYPE_OVERLAY, plane)) { - DRM_ERROR("KMS: Failed to initialize overlay plane\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize overlay plane\n"); goto fail; } } for (i = 0; i < dm->dc->caps.max_streams; i++) if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) { - DRM_ERROR("KMS: Failed to initialize crtc\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize crtc\n"); goto fail; } @@ -5203,7 +5128,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case IP_VERSION(3, 6, 0): case IP_VERSION(4, 0, 1): if (register_outbox_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; @@ -5253,7 +5178,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } if (link_cnt > MAX_LINKS) { - DRM_ERROR( + drm_err(adev_to_drm(adev), "KMS: Cannot support more than %d display indexes\n", MAX_LINKS); goto fail; @@ -5269,12 +5194,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) struct amdgpu_dm_wb_connector *wbcon = kzalloc(sizeof(*wbcon), GFP_KERNEL); if (!wbcon) { - DRM_ERROR("KMS: Failed to allocate writeback connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to allocate writeback connector\n"); continue; } if (amdgpu_dm_wb_connector_init(dm, wbcon, i)) { - DRM_ERROR("KMS: Failed to initialize writeback connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize writeback connector\n"); kfree(wbcon); continue; } @@ -5294,12 +5219,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; if (amdgpu_dm_encoder_init(dm->ddev, aencoder, i)) { - DRM_ERROR("KMS: Failed to initialize encoder\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize encoder\n"); goto fail; } if (amdgpu_dm_connector_init(dm, aconnector, i, aencoder)) { - DRM_ERROR("KMS: Failed to initialize connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to initialize connector\n"); goto fail; } @@ -5308,7 +5233,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) aconnector; if (!dc_link_detect_connection_type(link, &new_connection_type)) - DRM_ERROR("KMS: Failed to detect connector\n"); + drm_err(adev_to_drm(adev), "KMS: Failed to detect connector\n"); if (aconnector->base.force && new_connection_type == dc_connection_none) { emulated_link_detect(link); @@ -5330,8 +5255,15 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (amdgpu_dm_set_replay_caps(link, aconnector)) psr_feature_enabled = false; - if (psr_feature_enabled) + if (psr_feature_enabled) { amdgpu_dm_set_psr_caps(link); + drm_info(adev_to_drm(adev), "PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n", + link->psr_settings.psr_feature_enabled, + link->psr_settings.psr_version, + link->dpcd_caps.psr_info.psr_version, + link->dpcd_caps.psr_info.psr_dpcd_caps.raw, + link->dpcd_caps.psr_info.psr2_su_y_granularity_cap); + } } } amdgpu_set_panel_orientation(&aconnector->base); @@ -5345,7 +5277,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case CHIP_VERDE: case CHIP_OLAND: if (dce60_register_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; @@ -5367,7 +5299,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case CHIP_VEGA12: case CHIP_VEGA20: if (dce110_register_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; @@ -5395,12 +5327,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case IP_VERSION(3, 6, 0): case IP_VERSION(4, 0, 1): if (dcn10_register_irq_handlers(dm->adev)) { - DRM_ERROR("DM: Failed to initialize IRQ\n"); + drm_err(adev_to_drm(adev), "DM: Failed to initialize IRQ\n"); goto fail; } break; default: - DRM_ERROR("Unsupported DCE IP versions: 0x%X\n", + drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%X\n", amdgpu_ip_version(adev, DCE_HWIP, 0)); goto fail; } @@ -5561,7 +5493,7 @@ static int dm_early_init(struct amdgpu_ip_block *ip_block) /* if there is no object header, skip DM */ if (!amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; - dev_info(adev->dev, "No object header, skipping DM\n"); + drm_info(adev_to_drm(adev), "No object header, skipping DM\n"); return -ENOENT; } @@ -5673,7 +5605,7 @@ static int dm_early_init(struct amdgpu_ip_block *ip_block) adev->mode_info.num_dig = 4; break; default: - DRM_ERROR("Unsupported DCE IP versions: 0x%x\n", + drm_err(adev_to_drm(adev), "Unsupported DCE IP versions: 0x%x\n", amdgpu_ip_version(adev, DCE_HWIP, 0)); return -EINVAL; } @@ -5822,7 +5754,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616; break; default: - DRM_ERROR( + drm_err(adev_to_drm(adev), "Unsupported screen format %p4cc\n", &fb->format->format); return -EINVAL; @@ -6343,6 +6275,7 @@ static void fill_stream_properties_from_drm_display_mode( struct amdgpu_dm_connector *aconnector = NULL; struct hdmi_vendor_infoframe hv_frame; struct hdmi_avi_infoframe avi_frame; + ssize_t err; if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) aconnector = to_amdgpu_dm_connector(connector); @@ -6389,9 +6322,17 @@ static void fill_stream_properties_from_drm_display_mode( } if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { - drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in); + err = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, + (struct drm_connector *)connector, + mode_in); + if (err < 0) + drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd \n", connector->name, err); timing_out->vic = avi_frame.video_code; - drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in); + err = drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, + (struct drm_connector *)connector, + mode_in); + if (err < 0) + drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd \n", connector->name, err); timing_out->hdmi_vic = hv_frame.vic; } @@ -6516,7 +6457,7 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, } static struct dc_sink * -create_fake_sink(struct dc_link *link) +create_fake_sink(struct drm_device *dev, struct dc_link *link) { struct dc_sink_init_data sink_init_data = { 0 }; struct dc_sink *sink = NULL; @@ -6526,7 +6467,7 @@ create_fake_sink(struct dc_link *link) sink = dc_sink_create(&sink_init_data); if (!sink) { - DRM_ERROR("Failed to create sink!\n"); + drm_err(dev, "Failed to create sink!\n"); return NULL; } sink->sink_signal = SIGNAL_TYPE_VIRTUAL; @@ -6659,7 +6600,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, m_pref = list_first_entry_or_null( &aconnector->base.modes, struct drm_display_mode, head); if (!m_pref) { - DRM_DEBUG_DRIVER("No preferred mode found in EDID\n"); + drm_dbg_driver(aconnector->base.dev, "No preferred mode found in EDID\n"); return NULL; } } @@ -6834,7 +6775,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_link_get_highest_encoding_format(aconnector->dc_link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: SST_DSC [%s] DSC is selected from SST RX\n", + drm_dbg_driver(drm_connector->dev, "%s: SST_DSC [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); } } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { @@ -6854,7 +6795,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_link_get_highest_encoding_format(aconnector->dc_link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: SST_DSC [%s] DSC is selected from DP-HDMI PCON\n", + drm_dbg_driver(drm_connector->dev, "%s: SST_DSC [%s] DSC is selected from DP-HDMI PCON\n", __func__, drm_connector->name); } } @@ -6882,6 +6823,7 @@ create_stream_for_sink(struct drm_connector *connector, const struct dc_stream_state *old_stream, int requested_bpc) { + struct drm_device *dev = connector->dev; struct amdgpu_dm_connector *aconnector = NULL; struct drm_display_mode *preferred_mode = NULL; const struct drm_connector_state *con_state = &dm_state->base; @@ -6904,11 +6846,6 @@ create_stream_for_sink(struct drm_connector *connector, drm_mode_init(&mode, drm_mode); memset(&saved_mode, 0, sizeof(saved_mode)); - if (connector == NULL) { - DRM_ERROR("connector is NULL!\n"); - return stream; - } - if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK) { aconnector = NULL; aconnector = to_amdgpu_dm_connector(connector); @@ -6923,7 +6860,7 @@ create_stream_for_sink(struct drm_connector *connector, } if (!aconnector || !aconnector->dc_sink) { - sink = create_fake_sink(link); + sink = create_fake_sink(dev, link); if (!sink) return stream; @@ -6935,7 +6872,7 @@ create_stream_for_sink(struct drm_connector *connector, stream = dc_create_stream_for_sink(sink); if (stream == NULL) { - DRM_ERROR("Failed to create stream for sink!\n"); + drm_err(dev, "Failed to create stream for sink!\n"); goto finish; } @@ -6967,7 +6904,7 @@ create_stream_for_sink(struct drm_connector *connector, * case, we call set mode ourselves to restore the previous mode * and the modelist may not be filled in time. */ - DRM_DEBUG_DRIVER("No preferred mode found\n"); + drm_dbg_driver(dev, "No preferred mode found\n"); } else if (aconnector) { recalculate_timing = amdgpu_freesync_vid_mode && is_freesync_video_mode(&mode, aconnector); @@ -7417,6 +7354,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) struct dc_sink *dc_em_sink = aconnector->dc_em_sink; const struct drm_edid *drm_edid; struct i2c_adapter *ddc; + struct drm_device *dev = connector->dev; if (dc_link && dc_link->aux_mode) ddc = &aconnector->dm_dp_aux.aux.ddc; @@ -7426,7 +7364,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { - DRM_ERROR("No EDID found on connector: %s.\n", connector->name); + drm_err(dev, "No EDID found on connector: %s.\n", connector->name); return; } @@ -7485,7 +7423,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { - DRM_ERROR("No EDID found on connector: %s.\n", connector->name); + drm_err(connector->dev, "No EDID found on connector: %s.\n", connector->name); return; } @@ -7619,7 +7557,7 @@ create_validate_stream_for_sink(struct drm_connector *connector, dm_state, old_stream, requested_bpc); if (stream == NULL) { - DRM_ERROR("Failed to create stream for sink!\n"); + drm_err(adev_to_drm(adev), "Failed to create stream for sink!\n"); break; } @@ -7694,7 +7632,7 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec if (dc_sink == NULL && aconnector->base.force != DRM_FORCE_ON_DIGITAL && aconnector->base.force != DRM_FORCE_ON) { - DRM_ERROR("dc_sink is NULL!\n"); + drm_err(connector->dev, "dc_sink is NULL!\n"); goto fail; } @@ -8602,7 +8540,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, i2c = create_i2c(link->ddc, false); if (!i2c) { - DRM_ERROR("Failed to create i2c adapter data\n"); + drm_err(adev_to_drm(dm->adev), "Failed to create i2c adapter data\n"); return -ENOMEM; } @@ -8610,7 +8548,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, res = i2c_add_adapter(&i2c->base); if (res) { - DRM_ERROR("Failed to register hw i2c %d\n", link->link_index); + drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index); goto out_free; } @@ -8624,7 +8562,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, &i2c->base); if (res) { - DRM_ERROR("connector_init failed\n"); + drm_err(adev_to_drm(dm->adev), "connector_init failed\n"); aconnector->connector_id = -1; goto out_free; } @@ -9114,7 +9052,7 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state, */ WARN_ON(amdgpu_dm_crtc_set_vupdate_irq(new_state->base.crtc, true) != 0); WARN_ON(drm_crtc_vblank_get(new_state->base.crtc) != 0); - DRM_DEBUG_DRIVER("%s: crtc=%u VRR off->on: Get vblank ref\n", + drm_dbg_driver(new_state->base.crtc->dev, "%s: crtc=%u VRR off->on: Get vblank ref\n", __func__, new_state->base.crtc->base.id); } else if (old_vrr_active && !new_vrr_active) { /* Transition VRR active -> inactive: @@ -9122,7 +9060,7 @@ static void amdgpu_dm_handle_vrr_transition(struct dm_crtc_state *old_state, */ WARN_ON(amdgpu_dm_crtc_set_vupdate_irq(new_state->base.crtc, false) != 0); drm_crtc_vblank_put(new_state->base.crtc); - DRM_DEBUG_DRIVER("%s: crtc=%u VRR on->off: Drop vblank ref\n", + drm_dbg_driver(new_state->base.crtc->dev, "%s: crtc=%u VRR on->off: Drop vblank ref\n", __func__, new_state->base.crtc->base.id); } } @@ -9209,13 +9147,13 @@ static void amdgpu_dm_update_cursor(struct drm_plane *plane, if (crtc_state->stream) { if (!dc_stream_set_cursor_attributes(crtc_state->stream, &attributes)) - DRM_ERROR("DC failed to set cursor attributes\n"); + drm_err(adev_to_drm(adev), "DC failed to set cursor attributes\n"); update->cursor_attributes = &crtc_state->stream->cursor_attributes; if (!dc_stream_set_cursor_position(crtc_state->stream, &position)) - DRM_ERROR("DC failed to set cursor position\n"); + drm_err(adev_to_drm(adev), "DC failed to set cursor position\n"); update->cursor_position = &crtc_state->stream->cursor_position; } @@ -9466,7 +9404,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates[planes_count].surface = dc_plane; if (!bundle->surface_updates[planes_count].surface) { - DRM_ERROR("No surface for CRTC: id=%d\n", + drm_err(dev, "No surface for CRTC: id=%d\n", acrtc_attach->crtc_id); continue; } @@ -9982,20 +9920,20 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, wb_info = kzalloc(sizeof(*wb_info), GFP_KERNEL); if (!wb_info) { - DRM_ERROR("Failed to allocate wb_info\n"); + drm_err(adev_to_drm(adev), "Failed to allocate wb_info\n"); return; } acrtc = to_amdgpu_crtc(wb_conn->encoder.crtc); if (!acrtc) { - DRM_ERROR("no amdgpu_crtc found\n"); + drm_err(adev_to_drm(adev), "no amdgpu_crtc found\n"); kfree(wb_info); return; } afb = to_amdgpu_framebuffer(new_con_state->writeback_job->fb); if (!afb) { - DRM_ERROR("No amdgpu_framebuffer found\n"); + drm_err(adev_to_drm(adev), "No amdgpu_framebuffer found\n"); kfree(wb_info); return; } @@ -10216,7 +10154,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED) enable_encryption = true; - DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); + drm_info(adev_to_drm(adev), "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); if (aconnector->dc_link) hdcp_update_display( @@ -10308,7 +10246,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) */ dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC); if (!dummy_updates) { - DRM_ERROR("Failed to allocate memory for dummy_updates.\n"); + drm_err(adev_to_drm(adev), "Failed to allocate memory for dummy_updates.\n"); continue; } for (j = 0; j < status->plane_count; j++) @@ -10516,16 +10454,20 @@ static int dm_force_atomic_commit(struct drm_connector *connector) */ conn_state = drm_atomic_get_connector_state(state, connector); - ret = PTR_ERR_OR_ZERO(conn_state); - if (ret) + /* Check for error in getting connector state */ + if (IS_ERR(conn_state)) { + ret = PTR_ERR(conn_state); goto out; + } /* Attach crtc to drm_atomic_state*/ crtc_state = drm_atomic_get_crtc_state(state, &disconnected_acrtc->base); - ret = PTR_ERR_OR_ZERO(crtc_state); - if (ret) + /* Check for error in getting crtc state */ + if (IS_ERR(crtc_state)) { + ret = PTR_ERR(crtc_state); goto out; + } /* force a restore */ crtc_state->mode_changed = true; @@ -10533,9 +10475,11 @@ static int dm_force_atomic_commit(struct drm_connector *connector) /* Attach plane to drm_atomic_state */ plane_state = drm_atomic_get_plane_state(state, plane); - ret = PTR_ERR_OR_ZERO(plane_state); - if (ret) + /* Check for error in getting plane state */ + if (IS_ERR(plane_state)) { + ret = PTR_ERR(plane_state); goto out; + } /* Call commit internally with the state we just constructed */ ret = drm_atomic_commit(state); @@ -10543,7 +10487,7 @@ static int dm_force_atomic_commit(struct drm_connector *connector) out: drm_atomic_state_put(state); if (ret) - DRM_ERROR("Restoring old state failed with %i\n", ret); + drm_err(ddev, "Restoring old state failed with %i\n", ret); return ret; } @@ -10627,7 +10571,7 @@ static int do_aquire_global_lock(struct drm_device *dev, &commit->flip_done, 10*HZ); if (ret == 0) - DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done timed out\n", + drm_err(dev, "[CRTC:%d:%s] hw_done or flip_done timed out\n", crtc->base.id, crtc->name); drm_crtc_commit_put(commit); @@ -10743,6 +10687,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, struct dm_atomic_state *dm_state = NULL; struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; struct dc_stream_state *new_stream; + struct amdgpu_device *adev = dm->adev; int ret = 0; /* @@ -10772,8 +10717,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, drm_old_conn_state = drm_atomic_get_old_connector_state(state, connector); - if (IS_ERR(drm_new_conn_state)) { - ret = PTR_ERR_OR_ZERO(drm_new_conn_state); + if (WARN_ON(!drm_new_conn_state)) { + ret = -EINVAL; goto fail; } @@ -10796,7 +10741,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, */ if (!new_stream) { - DRM_DEBUG_DRIVER("%s: Failed to create new stream for crtc %d\n", + drm_dbg_driver(adev_to_drm(adev), "%s: Failed to create new stream for crtc %d\n", __func__, acrtc->base.base.id); ret = -ENOMEM; goto fail; @@ -10834,7 +10779,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { new_crtc_state->mode_changed = false; - DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d", + drm_dbg_driver(adev_to_drm(adev), "Mode change not required, setting mode_changed to %d", new_crtc_state->mode_changed); } } @@ -10872,7 +10817,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state)) { new_crtc_state->mode_changed = false; - DRM_DEBUG_DRIVER( + drm_dbg_driver(adev_to_drm(adev), "Mode change not required for front porch change, setting mode_changed to %d", new_crtc_state->mode_changed); @@ -10893,7 +10838,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, if (ret) goto fail; - DRM_DEBUG_DRIVER("Disabling DRM crtc: %d\n", + drm_dbg_driver(adev_to_drm(adev), "Disabling DRM crtc: %d\n", crtc->base.id); /* i.e. reset mode */ @@ -11746,7 +11691,7 @@ static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev, old_plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(new_plane_state) || IS_ERR(old_plane_state)) { - DRM_ERROR("Failed to get plane state for plane %s\n", plane->name); + drm_err(dev, "Failed to get plane state for plane %s\n", plane->name); return false; } @@ -12315,7 +12260,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, res = dc_wake_and_execute_dmub_cmd(dm->dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY); if (!res) { - DRM_ERROR("EDID CEA parser failed\n"); + drm_err(adev_to_drm(dm->adev), "EDID CEA parser failed\n"); return false; } @@ -12323,7 +12268,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, if (output->type == DMUB_CMD__EDID_CEA_ACK) { if (!output->ack.success) { - DRM_ERROR("EDID CEA ack failed at offset %d\n", + drm_err(adev_to_drm(dm->adev), "EDID CEA ack failed at offset %d\n", output->ack.offset); } } else if (output->type == DMUB_CMD__EDID_CEA_AMD_VSDB) { @@ -12335,7 +12280,7 @@ static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, vsdb->min_refresh_rate_hz = output->amd_vsdb.min_frame_rate; vsdb->max_refresh_rate_hz = output->amd_vsdb.max_frame_rate; } else { - DRM_WARN("Unknown EDID CEA parser results\n"); + drm_warn(adev_to_drm(dm->adev), "Unknown EDID CEA parser results\n"); return false; } @@ -12551,7 +12496,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, enum adaptive_sync_type as_type = ADAPTIVE_SYNC_TYPE_NONE; if (!connector->state) { - DRM_ERROR("%s - Connector has no state", __func__); + drm_err(adev_to_drm(adev), "%s - Connector has no state", __func__); goto update; } @@ -12736,7 +12681,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( } if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { - DRM_ERROR("wait_for_completion_timeout timeout!"); + drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!"); *operation_result = AUX_RET_ERROR_TIMEOUT; goto out; } @@ -12747,11 +12692,11 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * lead to this error. We can ignore this for now. */ if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { - DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", + drm_warn(adev_to_drm(adev), "DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); } - *operation_result = AUX_RET_ERROR_INVALID_REPLY; + *operation_result = p_notify->result; goto out; } @@ -12774,6 +12719,79 @@ out: return ret; } +static void abort_fused_io( + struct dc_context *ctx, + const struct dmub_cmd_fused_request *request +) +{ + union dmub_rb_cmd command = { 0 }; + struct dmub_rb_cmd_fused_io *io = &command.fused_io; + + io->header.type = DMUB_CMD__FUSED_IO; + io->header.sub_type = DMUB_CMD__FUSED_IO_ABORT; + io->header.payload_bytes = sizeof(*io) - sizeof(io->header); + io->request = *request; + dm_execute_dmub_cmd(ctx, &command, DM_DMUB_WAIT_TYPE_NO_WAIT); +} + +static bool execute_fused_io( + struct amdgpu_device *dev, + struct dc_context *ctx, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +) +{ + const uint8_t ddc_line = commands[0].fused_io.request.u.aux.ddc_line; + + if (ddc_line >= ARRAY_SIZE(dev->dm.fused_io)) + return false; + + struct fused_io_sync *sync = &dev->dm.fused_io[ddc_line]; + struct dmub_rb_cmd_fused_io *first = &commands[0].fused_io; + const bool result = dm_execute_dmub_cmd_list(ctx, count, commands, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + && first->header.ret_status + && first->request.status == FUSED_REQUEST_STATUS_SUCCESS; + + if (!result) + return false; + + while (wait_for_completion_timeout(&sync->replied, usecs_to_jiffies(timeout_us))) { + reinit_completion(&sync->replied); + + struct dmub_cmd_fused_request *reply = (struct dmub_cmd_fused_request *) sync->reply_data; + + static_assert(sizeof(*reply) <= sizeof(sync->reply_data), "Size mismatch"); + + if (reply->identifier == first->request.identifier) { + first->request = *reply; + return true; + } + } + + reinit_completion(&sync->replied); + first->request.status = FUSED_REQUEST_STATUS_TIMEOUT; + abort_fused_io(ctx, &first->request); + return false; +} + +bool amdgpu_dm_execute_fused_io( + struct amdgpu_device *dev, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us) +{ + struct amdgpu_display_manager *dm = &dev->dm; + + mutex_lock(&dm->dpia_aux_lock); + + const bool result = execute_fused_io(dev, link->ctx, commands, count, timeout_us); + + mutex_unlock(&dm->dpia_aux_lock); + return result; +} + int amdgpu_dm_process_dmub_set_config_sync( struct dc_context *ctx, unsigned int link_index, @@ -12792,7 +12810,7 @@ int amdgpu_dm_process_dmub_set_config_sync( ret = 0; *operation_result = adev->dm.dmub_notify->sc_status; } else { - DRM_ERROR("wait_for_completion_timeout timeout!"); + drm_err(adev_to_drm(adev), "wait_for_completion_timeout timeout!"); ret = -1; *operation_result = SET_CONFIG_UNKNOWN_ERROR; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 385faaca6e26..d7d92f9911e4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,7 +50,7 @@ #define AMDGPU_DM_MAX_NUM_EDP 2 -#define AMDGPU_DMUB_NOTIFICATION_MAX 7 +#define AMDGPU_DMUB_NOTIFICATION_MAX 8 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 @@ -81,6 +81,7 @@ struct amdgpu_bo; struct dmub_srv; struct dc_plane_state; struct dmub_notification; +struct dmub_cmd_fused_request; struct amd_vsdb_block { unsigned char ieee_id[3]; @@ -276,6 +277,10 @@ struct hpd_rx_irq_offload_work { * @offload_wq: offload work queue that this work is queued to */ struct hpd_rx_irq_offload_work_queue *offload_wq; + /** + * @adev: amdgpu_device pointer + */ + struct amdgpu_device *adev; }; /** @@ -613,6 +618,13 @@ struct amdgpu_display_manager { */ bool aux_hpd_discon_quirk; + /** + * @edp0_on_dp1_quirk: + * + * quirk for platforms that put edp0 on DP1. + */ + bool edp0_on_dp1_quirk; + /** * @dpia_aux_lock: * @@ -633,6 +645,16 @@ struct amdgpu_display_manager { * OEM i2c bus */ struct amdgpu_i2c_adapter *oem_i2c; + + /** + * @fused_io: + * + * dmub fused io interface + */ + struct fused_io_sync { + struct completion replied; + char reply_data[0x40]; // Cannot include dmub_cmd here + } fused_io[8]; }; enum dsc_clock_force_state { @@ -1012,6 +1034,14 @@ extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index, struct aux_payload *payload, enum aux_return_code_type *operation_result); +bool amdgpu_dm_execute_fused_io( + struct amdgpu_device *dev, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +); + int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index, struct set_config_cmd_payload *payload, enum set_config_status *operation_result); @@ -1045,4 +1075,6 @@ void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector); void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector); int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector); +void retrieve_dmi_info(struct amdgpu_display_manager *dm); + #endif /* __AMDGPU_DM_H__ */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 8f22ad966543..c16962256514 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -26,6 +26,7 @@ #include "amdgpu_dm_hdcp.h" #include "amdgpu.h" #include "amdgpu_dm.h" +#include "dc_fused_io.h" #include "dm_helpers.h" #include #include "hdcp_psp.h" @@ -76,6 +77,34 @@ lp_read_dpcd(void *handle, uint32_t address, uint8_t *data, uint32_t size) return dm_helpers_dp_read_dpcd(link->ctx, link, address, data, size); } +static bool lp_atomic_write_poll_read_i2c( + void *handle, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + struct dc_link *link = handle; + + return dm_atomic_write_poll_read_i2c(link, write, poll, read, poll_timeout_us, poll_mask_msb); +} + +static bool lp_atomic_write_poll_read_aux( + void *handle, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + struct dc_link *link = handle; + + return dm_atomic_write_poll_read_aux(link, write, poll, read, poll_timeout_us, poll_mask_msb); +} + static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint32_t *srm_size) { struct ta_hdcp_shared_memory *hdcp_cmd; @@ -732,7 +761,10 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, INIT_DELAYED_WORK(&hdcp_work[i].watchdog_timer_dwork, event_watchdog_timer); INIT_DELAYED_WORK(&hdcp_work[i].property_validate_dwork, event_property_validate); - hdcp_work[i].hdcp.config.psp.handle = &adev->psp; + struct mod_hdcp_config *config = &hdcp_work[i].hdcp.config; + struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs; + + config->psp.handle = &adev->psp; if (dc->ctx->dce_version == DCN_VERSION_3_1 || dc->ctx->dce_version == DCN_VERSION_3_14 || dc->ctx->dce_version == DCN_VERSION_3_15 || @@ -740,12 +772,22 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, dc->ctx->dce_version == DCN_VERSION_3_51 || dc->ctx->dce_version == DCN_VERSION_3_6 || dc->ctx->dce_version == DCN_VERSION_3_16) - hdcp_work[i].hdcp.config.psp.caps.dtm_v3_supported = 1; - hdcp_work[i].hdcp.config.ddc.handle = dc_get_link_at_index(dc, i); - hdcp_work[i].hdcp.config.ddc.funcs.write_i2c = lp_write_i2c; - hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c; - hdcp_work[i].hdcp.config.ddc.funcs.write_dpcd = lp_write_dpcd; - hdcp_work[i].hdcp.config.ddc.funcs.read_dpcd = lp_read_dpcd; + config->psp.caps.dtm_v3_supported = 1; + config->ddc.handle = dc_get_link_at_index(dc, i); + + ddc_funcs->write_i2c = lp_write_i2c; + ddc_funcs->read_i2c = lp_read_i2c; + ddc_funcs->write_dpcd = lp_write_dpcd; + ddc_funcs->read_dpcd = lp_read_dpcd; + + config->debug.lc_enable_sw_fallback = dc->debug.hdcp_lc_enable_sw_fallback; + if (dc->caps.fused_io_supported || dc->debug.hdcp_lc_force_fw_enable) { + ddc_funcs->atomic_write_poll_read_i2c = lp_atomic_write_poll_read_i2c; + ddc_funcs->atomic_write_poll_read_aux = lp_atomic_write_poll_read_aux; + } else { + ddc_funcs->atomic_write_poll_read_i2c = NULL; + ddc_funcs->atomic_write_poll_read_aux = NULL; + } memset(hdcp_work[i].aconnector, 0, sizeof(struct amdgpu_dm_connector *) * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 1395a748d726..d4395b92fb85 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -630,6 +630,19 @@ bool dm_helpers_submit_i2c( return result; } +bool dm_helpers_execute_fused_io( + struct dc_context *ctx, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +) +{ + struct amdgpu_device *dev = ctx->driver_context; + + return amdgpu_dm_execute_fused_io(dev, link, commands, count, timeout_us); +} + static bool execute_synaptics_rc_command(struct drm_dp_aux *aux, bool is_write_cmd, unsigned char cmd, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5cdbc86ef8f5..25e8befbcc47 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1739,16 +1739,17 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, struct dc_dsc_bw_range *bw_range) { struct dc_dsc_policy dsc_policy = {0}; + bool is_dsc_possible; dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); - dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], - stream->sink->ctx->dc->debug.dsc_min_slice_height_override, - dsc_policy.min_target_bpp * 16, - dsc_policy.max_target_bpp * 16, - &stream->sink->dsc_caps.dsc_dec_caps, - &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range); + is_dsc_possible = dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], + stream->sink->ctx->dc->debug.dsc_min_slice_height_override, + dsc_policy.min_target_bpp * 16, + dsc_policy.max_target_bpp * 16, + &stream->sink->dsc_caps.dsc_dec_caps, + &stream->timing, dc_link_get_highest_encoding_format(stream->link), bw_range); - return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16; + return is_dsc_possible; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 3e0f45f1711c..b7c6e8d13435 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -948,13 +948,13 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, adev = amdgpu_ttm_adev(rbo->tbo.bdev); r = amdgpu_bo_reserve(rbo, true); if (r) { - dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r); return r; } r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); if (r) { - dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r); goto error_unlock; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index e140b7a04d72..f984cb0cb889 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -87,14 +87,6 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link) link->psr_settings.psr_feature_enabled = true; } - - DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d DPCD caps 0x%x su_y_granularity %d\n", - link->psr_settings.psr_feature_enabled, - link->psr_settings.psr_version, - link->dpcd_caps.psr_info.psr_version, - link->dpcd_caps.psr_info.psr_dpcd_caps.raw, - link->dpcd_caps.psr_info.psr2_su_y_granularity_cap); - } /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c new file mode 100644 index 000000000000..1da07ebf9217 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_quirks.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include + +#include "amdgpu.h" +#include "amdgpu_dm.h" + +struct amdgpu_dm_quirks { + bool aux_hpd_discon; + bool support_edp0_on_dp1; +}; + +static struct amdgpu_dm_quirks quirk_entries = { + .aux_hpd_discon = false, + .support_edp0_on_dp1 = false +}; + +static int edp0_on_dp1_callback(const struct dmi_system_id *id) +{ + quirk_entries.support_edp0_on_dp1 = true; + return 0; +} + +static int aux_hpd_discon_callback(const struct dmi_system_id *id) +{ + quirk_entries.aux_hpd_discon = true; + return 0; +} + +static const struct dmi_system_id dmi_quirk_table[] = { + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), + }, + }, + { + .callback = aux_hpd_discon_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"), + }, + }, + {} + /* TODO: refactor this from a fixed table to a dynamic option */ +}; + +void retrieve_dmi_info(struct amdgpu_display_manager *dm) +{ + struct drm_device *dev = dm->ddev; + int dmi_id; + + dm->aux_hpd_discon_quirk = false; + dm->edp0_on_dp1_quirk = false; + + dmi_id = dmi_check_system(dmi_quirk_table); + + if (!dmi_id) + return; + + if (quirk_entries.aux_hpd_discon) { + dm->aux_hpd_discon_quirk = true; + drm_info(dev, "aux_hpd_discon_quirk attached\n"); + } + if (quirk_entries.support_edp0_on_dp1) { + dm->edp0_on_dp1_quirk = true; + drm_info(dev, "support_edp0_on_dp1 attached\n"); + } +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index 0d5fefb0f591..d9527c05fc87 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -102,13 +102,13 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector r = amdgpu_bo_reserve(rbo, true); if (r) { - dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + drm_err(adev_to_drm(adev), "fail to reserve bo (%d)\n", r); return r; } r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); if (r) { - dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + drm_err(adev_to_drm(adev), "reserving fence slot failed (%d)\n", r); goto error_unlock; } diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 3e1f5b689718..3c9ecea7eebc 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -53,31 +53,30 @@ DC_LIBS += hdcp ifdef CONFIG_DRM_AMD_DC_FP DC_LIBS += sspl -DC_SPL_TRANS += dc_spl_translate.o +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, dc_spl_translate.o) endif AMD_DC = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/dc/,$(DC_LIBS))) include $(AMD_DC) -DISPLAY_CORE = dc.o dc_stat.o dc_resource.o dc_hw_sequencer.o dc_sink.o \ -dc_surface.o dc_debug.o dc_stream.o dc_link_enc_cfg.o dc_link_exports.o dc_state.o +FILES = +FILES += dc_dmub_srv.o +FILES += dc_edid_parser.o +FILES += dc_fused_io.o +FILES += dc_helper.o +FILES += core/dc.o +FILES += core/dc_debug.o +FILES += core/dc_hw_sequencer.o +FILES += core/dc_link_enc_cfg.o +FILES += core/dc_link_exports.o +FILES += core/dc_resource.o +FILES += core/dc_sink.o +FILES += core/dc_stat.o +FILES += core/dc_state.o +FILES += core/dc_stream.o +FILES += core/dc_surface.o +FILES += core/dc_vm_helper.o -DISPLAY_CORE += dc_vm_helper.o +AMD_DISPLAY_FILES += $(addprefix $(AMDDALPATH)/dc/, $(FILES)) -AMD_DISPLAY_CORE = $(addprefix $(AMDDALPATH)/dc/core/,$(DISPLAY_CORE)) - -AMD_DM_REG_UPDATE = $(addprefix $(AMDDALPATH)/dc/,dc_helper.o) - -AMD_DC_SPL_TRANS = $(addprefix $(AMDDALPATH)/dc/,$(DC_SPL_TRANS)) - -AMD_DISPLAY_FILES += $(AMD_DISPLAY_CORE) -AMD_DISPLAY_FILES += $(AMD_DM_REG_UPDATE) - -DC_DMUB += dc_dmub_srv.o -DC_EDID += dc_edid_parser.o -AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB)) -AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID)) -AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID) - -AMD_DISPLAY_FILES += $(AMD_DC_SPL_TRANS) diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 88d3f9d7dd55..452206b5095e 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -51,8 +51,6 @@ static inline unsigned long long complete_integer_division_u64( { unsigned long long result; - ASSERT(divisor); - result = div64_u64_rem(dividend, divisor, remainder); return result; @@ -213,9 +211,6 @@ struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) * @note * Good idea to use Newton's method */ - - ASSERT(arg.value); - return dc_fixpt_from_fraction( dc_fixpt_one.value, arg.value); diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 3bacf470f7c5..67f08495b7e6 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -2384,10 +2384,10 @@ static enum bp_result get_integrated_info_v8( } /* - * get_integrated_info_v8 + * get_integrated_info_v9 * * @brief - * Get V8 integrated BIOS information + * Get V9 integrated BIOS information * * @param * bios_parser *bp - [in]BIOS parser handler to get master data table diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c index 19897fa52e7e..d82a52319088 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c @@ -142,17 +142,3 @@ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_di return actual_dispclk_set_mhz * 1000; } - -int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr) -{ - int actual_dprefclk_set_mhz = -1; - - actual_dprefclk_set_mhz = rv1_vbios_smu_send_msg_with_param( - clk_mgr, - VBIOSSMC_MSG_SetDprefclkFreq, - khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz)); - - /* TODO: add code for programing DP DTO, currently this is down by command table */ - - return actual_dprefclk_set_mhz * 1000; -} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h index 083cb3158859..81d7c912549c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h @@ -27,6 +27,5 @@ #define DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz); -int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr); #endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 23b390245b5d..5a633333dbb5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -164,20 +164,6 @@ int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dis return actual_dispclk_set_mhz * 1000; } -int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr) -{ - int actual_dprefclk_set_mhz = -1; - - actual_dprefclk_set_mhz = rn_vbios_smu_send_msg_with_param( - clk_mgr, - VBIOSSMC_MSG_SetDprefclkFreq, - khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz)); - - /* TODO: add code for programing DP DTO, currently this is down by command table */ - - return actual_dprefclk_set_mhz * 1000; -} - int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz) { int actual_dcfclk_set_mhz = -1; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h index 1ce19d875358..f76fad87f0e1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h @@ -30,7 +30,6 @@ enum dcn_pwr_state; int rn_vbios_smu_get_smu_version(struct clk_mgr_internal *clk_mgr); int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz); -int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr); int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz); int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz); void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c index 2d14346b680e..478b4d6a3544 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c @@ -49,12 +49,9 @@ static const struct IP_BASE MP0_BASE = { { { { 0x00016000, 0x00DC0000, 0x00E0000 { { 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0 } }, { { 0, 0, 0, 0, 0, 0 } } } }; -static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0x0241B000, 0x04040000 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0 } } } }; + +#define CTX clk_mgr->base.ctx +#define IND_REG(offset) offset #define regBIF_BX_PF2_RSMU_INDEX 0x0000 #define regBIF_BX_PF2_RSMU_INDEX_BASE_IDX 1 @@ -67,9 +64,6 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D #define FN(reg_name, field) \ FD(reg_name##__##field) -#define REG_NBIO(reg_name) \ - (NBIO_BASE.instance[0].segment[regBIF_BX_PF2_ ## reg_name ## _BASE_IDX] + regBIF_BX_PF2_ ## reg_name) - #undef DC_LOGGER #define DC_LOGGER \ CTX->logger @@ -77,6 +71,13 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D #define mmMP1_C2PMSG_3 0x3B1050C +#define reg__MP1_C2PMSG_3_MASK (0xFFFFFFFF) +#define reg__MP1_C2PMSG_3__SHIFT (0) + + +#define data_reg_name__MP1_C2PMSG_3_MASK (0xFFFFFFFF) +#define data_reg_name__MP1_C2PMSG_3__SHIFT (0) + #define VBIOSSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team #define VBIOSSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version #define VBIOSSMC_MSG_Spare0 0x03 ///< Spare0 @@ -153,12 +154,10 @@ static int dcn315_smu_send_msg_with_param( for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) { /* Trigger the message transaction by writing the message ID */ - generic_write_indirect_reg(CTX, - REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA), - mmMP1_C2PMSG_3, msg_id); - read_back_data = generic_read_indirect_reg(CTX, - REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA), - mmMP1_C2PMSG_3); + IX_REG_SET_SYNC(mmMP1_C2PMSG_3, 0, + MP1_C2PMSG_3, msg_id); + IX_REG_GET_SYNC(mmMP1_C2PMSG_3, + MP1_C2PMSG_3, &read_back_data); if (read_back_data == msg_id) break; udelay(2); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c index 6a6ae618650b..4607eff07253 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c @@ -65,6 +65,7 @@ #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 #define mmCLK5_spll_field_8 0x1B04B +#define mmCLK6_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 142de8938d7c..bb1ac12a2b09 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -90,6 +90,7 @@ #define mmCLK1_CLK5_ALLOW_DS 0x16EB1 #define mmCLK5_spll_field_8 0x1B24B +#define mmCLK6_spll_field_8 0x1B24B #define mmDENTIST_DISPCLK_CNTL 0x0124 #define regDENTIST_DISPCLK_CNTL 0x0064 #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 @@ -116,6 +117,7 @@ #define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L #define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L +#define CLK6_spll_field_8__spll_ssc_en_MASK 0x00002000L #define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 #undef FN @@ -596,7 +598,11 @@ static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) uint32_t ssc_enable; - ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; + if (clk_mgr_base->ctx->dce_version == DCN_VERSION_3_51) { + ssc_enable = REG_READ(CLK6_spll_field_8) & CLK6_spll_field_8__spll_ssc_en_MASK; + } else { + ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; + } return ssc_enable != 0; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c index f6f0e6a33001..604d256cb47a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.c @@ -84,8 +84,8 @@ #define VBIOSSMC_MSG_AllowZstatesEntry 0x15 #define VBIOSSMC_MSG_DisallowZstatesEntry 0x16 #define VBIOSSMC_MSG_SetDtbClk 0x17 -#define VBIOSSMC_MSG_DispPsrEntry 0x18 ///< Display PSR entry, DMU -#define VBIOSSMC_MSG_DispPsrExit 0x19 ///< Display PSR exit, DMU +#define VBIOSSMC_MSG_DispIPS2Entry 0x18 ///< Display IPS2 entry, DMU +#define VBIOSSMC_MSG_DispIPS2Exit 0x19 ///< Display IPS2 exit, DMU #define VBIOSSMC_MSG_DisableLSdma 0x1A ///< Disable LSDMA; only sent by VBIOS #define VBIOSSMC_MSG_DpControllerPhyStatus 0x1B ///< Inform PMFW about the pre conditions for turning SLDO2 on/off . bit[0]==1 precondition is met, bit[1-2] are for DPPHY number #define VBIOSSMC_MSG_QueryIPS2Support 0x1C ///< Return 1: support; else not supported @@ -475,7 +475,7 @@ int dcn35_smu_exit_low_power_state(struct clk_mgr_internal *clk_mgr) retv = dcn35_smu_send_msg_with_param( clk_mgr, - VBIOSSMC_MSG_DispPsrExit, + VBIOSSMC_MSG_DispIPS2Exit, 0); smu_print("%s: smu_exit_low_power_state return = %d\n", __func__, retv); return retv; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ba4ce8a63158..56d011a1323c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -36,7 +36,9 @@ #include "resource.h" #include "dc_state.h" #include "dc_state_priv.h" +#include "dc_plane.h" #include "dc_plane_priv.h" +#include "dc_stream_priv.h" #include "gpio_service_interface.h" #include "clk_mgr.h" @@ -1195,6 +1197,12 @@ static void apply_ctx_interdependent_lock(struct dc *dc, static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx) { + if (dc->debug.visual_confirm & VISUAL_CONFIRM_EXPLICIT) { + memcpy(&pipe_ctx->visual_confirm_color, &pipe_ctx->plane_state->visual_confirm_color, + sizeof(pipe_ctx->visual_confirm_color)); + return; + } + if (dc->ctx->dce_version >= DCN_VERSION_1_0) { memset(&pipe_ctx->visual_confirm_color, 0, sizeof(struct tg_color)); @@ -1228,6 +1236,51 @@ static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *conte } } +void dc_get_visual_confirm_for_stream( + struct dc *dc, + struct dc_stream_state *stream_state, + struct tg_color *color) +{ + struct dc_stream_status *stream_status = dc_stream_get_status(stream_state); + struct pipe_ctx *pipe_ctx; + int i; + struct dc_plane_state *plane_state = NULL; + + if (!stream_status) + return; + + switch (dc->debug.visual_confirm) { + case VISUAL_CONFIRM_DISABLE: + return; + case VISUAL_CONFIRM_PSR: + case VISUAL_CONFIRM_FAMS: + pipe_ctx = dc_stream_get_pipe_ctx(stream_state); + if (!pipe_ctx) + return; + dc_dmub_srv_get_visual_confirm_color_cmd(dc, pipe_ctx); + memcpy(color, &dc->ctx->dmub_srv->dmub->visual_confirm_color, sizeof(struct tg_color)); + return; + + default: + /* find plane with highest layer_index */ + for (i = 0; i < stream_status->plane_count; i++) { + if (stream_status->plane_states[i]->visible) + plane_state = stream_status->plane_states[i]; + } + if (!plane_state) + return; + /* find pipe that contains plane with highest layer index */ + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state == plane_state) { + memcpy(color, &pipe->visual_confirm_color, sizeof(struct tg_color)); + return; + } + } + } +} + static void disable_dangling_plane(struct dc *dc, struct dc_state *context) { int i, j; @@ -2056,6 +2109,18 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc->hwss.enable_accelerated_mode(dc, context); } + if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + //Only delay otg master for a given config + if (resource_is_pipe_type(pipe, OTG_MASTER)) { + //dc_commit_state_no_check is always a full update + dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, pipe, false); + break; + } + } + } + if (context->stream_count > get_seamless_boot_stream_count(context) || context->stream_count == 0) dc->hwss.prepare_bandwidth(dc, context); @@ -2120,6 +2185,14 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (dc->hwss.program_front_end_for_ctx) { dc->hwss.interdependent_update_lock(dc, context, true); dc->hwss.program_front_end_for_ctx(dc, context); + + if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe); + } + } + dc->hwss.interdependent_update_lock(dc, context, false); dc->hwss.post_unlock_program_front_end(dc, context); } @@ -2261,11 +2334,15 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params for (i = 0; i < params->stream_count; i++) { struct dc_stream_state *stream = params->streams[i]; struct dc_stream_status *status = dc_stream_get_status(stream); + struct dc_sink *sink = stream->sink; /* revalidate streams */ - res = dc_validate_stream(dc, stream); - if (res != DC_OK) - return res; + if (!dc_is_virtual_signal(sink->sink_signal)) { + res = dc_validate_stream(dc, stream); + if (res != DC_OK) + return res; + } + dc_stream_log(dc, stream); @@ -2818,7 +2895,7 @@ static enum surface_update_type check_update_surfaces_for_stream( int i; enum surface_update_type overall_type = UPDATE_TYPE_FAST; - if (dc->idle_optimizations_allowed) + if (dc->idle_optimizations_allowed || dc_can_clear_cursor_limit(dc)) overall_type = UPDATE_TYPE_FULL; if (stream_status == NULL || stream_status->plane_count != surface_count) @@ -3223,7 +3300,7 @@ static void copy_stream_update_to_stream(struct dc *dc, if (dsc_validate_context) { stream->timing.dsc_cfg = *update->dsc_config; stream->timing.flags.DSC = enable_dsc; - if (!dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true)) { + if (dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true) != DC_OK) { stream->timing.dsc_cfg = old_dsc_cfg; stream->timing.flags.DSC = old_dsc_enabled; update->dsc_config = NULL; @@ -3252,7 +3329,7 @@ static void backup_planes_and_stream_state( return; for (i = 0; i < status->plane_count; i++) { - scratch->plane_states[i] = *status->plane_states[i]; + dc_plane_copy_config(&scratch->plane_states[i], status->plane_states[i]); } scratch->stream_state = *stream; } @@ -3268,10 +3345,7 @@ static void restore_planes_and_stream_state( return; for (i = 0; i < status->plane_count; i++) { - /* refcount will always be valid, restore everything else */ - struct kref refcount = status->plane_states[i]->refcount; - *status->plane_states[i] = scratch->plane_states[i]; - status->plane_states[i]->refcount = refcount; + dc_plane_copy_config(status->plane_states[i], &scratch->plane_states[i]); } *stream = scratch->stream_state; } @@ -3448,7 +3522,7 @@ static bool update_planes_and_stream_state(struct dc *dc, } if (update_type == UPDATE_TYPE_FULL) { - if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { + if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) { BREAK_TO_DEBUGGER(); goto fail; } @@ -4002,6 +4076,7 @@ static void commit_planes_for_stream(struct dc *dc, &context->res_ctx, stream); ASSERT(top_pipe_to_program != NULL); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -4052,6 +4127,9 @@ static void commit_planes_for_stream(struct dc *dc, dc->hwss.wait_for_dcc_meta_propagation(dc, top_pipe_to_program); } + if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) + dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type == UPDATE_TYPE_FAST); + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, true, should_lock_all_pipes, NULL, subvp_prev_use); @@ -4172,12 +4250,6 @@ static void commit_planes_for_stream(struct dc *dc, if (update_type == UPDATE_TYPE_FAST) continue; - ASSERT(!pipe_ctx->plane_state->triplebuffer_flips); - if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { - /*turn off triple buffer for full update*/ - dc->hwss.program_triplebuffer( - dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips); - } stream_status = stream_get_status(context, pipe_ctx->stream); @@ -4186,8 +4258,37 @@ static void commit_planes_for_stream(struct dc *dc, dc, pipe_ctx->stream, stream_status->plane_count, context); } } + + for (j = 0; j < dc->res_pool->pipe_count; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + if (!pipe_ctx->plane_state) + continue; + + /* Full fe update*/ + if (update_type == UPDATE_TYPE_FAST) + continue; + + ASSERT(!pipe_ctx->plane_state->triplebuffer_flips); + if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { + /*turn off triple buffer for full update*/ + dc->hwss.program_triplebuffer( + dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips); + } + } + if (dc->hwss.program_front_end_for_ctx && update_type != UPDATE_TYPE_FAST) { dc->hwss.program_front_end_for_ctx(dc, context); + + //Pipe busy until some frame and line # + if (dc->hwseq->funcs.set_wait_for_update_needed_for_pipe && update_type == UPDATE_TYPE_FULL) { + for (j = 0; j < dc->res_pool->pipe_count; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + dc->hwseq->funcs.set_wait_for_update_needed_for_pipe(dc, pipe_ctx); + } + } + if (dc->debug.validate_dml_output) { for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *cur_pipe = &context->res_ctx.pipe_ctx[i]; @@ -4527,7 +4628,7 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc, backup_and_set_minimal_pipe_split_policy(dc, base_context, policy); /* commit minimal state */ - if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false)) { + if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false) == DC_OK) { /* prevent underflow and corruption when reconfiguring pipes */ force_vsync_flip_in_minimal_transition_context(minimal_transition_context); } else { @@ -4962,6 +5063,9 @@ static bool full_update_required(struct dc *dc, if (dc->idle_optimizations_allowed) return true; + if (dc_can_clear_cursor_limit(dc)) + return true; + return false; } @@ -5047,7 +5151,7 @@ static bool update_planes_and_stream_v1(struct dc *dc, copy_stream_update_to_stream(dc, context, stream, stream_update); if (update_type >= UPDATE_TYPE_FULL) { - if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { + if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) { DC_ERROR("Mode validation failed for stream update!\n"); dc_state_release(context); return false; @@ -6191,15 +6295,22 @@ bool dc_abm_save_restore( void dc_query_current_properties(struct dc *dc, struct dc_current_properties *properties) { unsigned int i; - bool subvp_sw_cursor_req = false; + unsigned int max_cursor_size = dc->caps.max_cursor_size; + unsigned int stream_cursor_size; - for (i = 0; i < dc->current_state->stream_count; i++) { - if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i]) && !dc->current_state->streams[i]->hw_cursor_req) { - subvp_sw_cursor_req = true; - break; + if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) { + for (i = 0; i < dc->current_state->stream_count; i++) { + stream_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc, + dc->current_state, + dc->current_state->streams[i]); + + if (stream_cursor_size < max_cursor_size) { + max_cursor_size = stream_cursor_size; + } } } - properties->cursor_size_limit = subvp_sw_cursor_req ? 64 : dc->caps.max_cursor_size; + + properties->cursor_size_limit = max_cursor_size; } /** @@ -6265,3 +6376,27 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context) else return 0; } + +bool dc_is_cursor_limit_pending(struct dc *dc) +{ + uint32_t i; + + for (i = 0; i < dc->current_state->stream_count; i++) { + if (dc_stream_is_cursor_limit_pending(dc, dc->current_state->streams[i])) + return true; + } + + return false; +} + +bool dc_can_clear_cursor_limit(struct dc *dc) +{ + uint32_t i; + + for (i = 0; i < dc->current_state->stream_count; i++) { + if (dc_state_can_clear_stream_cursor_subvp_limit(dc->current_state->streams[i], dc->current_state)) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 650e89825968..7551d0a3fe82 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -266,6 +266,8 @@ char *dc_status_to_str(enum dc_status status) return "Fail dp payload allocation"; case DC_FAIL_DP_LINK_BANDWIDTH: return "Insufficient DP link bandwidth"; + case DC_FAIL_HW_CURSOR_SUPPORT: + return "HW Cursor not supported"; case DC_ERROR_UNEXPECTED: return "Unexpected error"; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 55b32dfbfdd6..7014b8d000bb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -697,7 +697,7 @@ void get_fams2_visual_confirm_color( void hwss_build_fast_sequence(struct dc *dc, struct dc_dmub_cmd *dc_dmub_cmd, unsigned int dmub_cmd_count, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], unsigned int *num_steps, struct pipe_ctx *pipe_ctx, struct dc_stream_status *stream_status, @@ -896,7 +896,7 @@ void hwss_build_fast_sequence(struct dc *dc, } void hwss_execute_sequence(struct dc *dc, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], int num_steps) { unsigned int i; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 313a32248cd7..3da25bd8b578 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1342,32 +1342,6 @@ static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx) data->viewport_c.y += src.y / vpc_div; } -static bool is_subvp_high_refresh_candidate(struct dc_stream_state *stream) -{ - uint32_t refresh_rate; - struct dc *dc = stream->ctx->dc; - - refresh_rate = (stream->timing.pix_clk_100hz * (uint64_t)100 + - stream->timing.v_total * stream->timing.h_total - (uint64_t)1); - refresh_rate = div_u64(refresh_rate, stream->timing.v_total); - refresh_rate = div_u64(refresh_rate, stream->timing.h_total); - - /* If there's any stream that fits the SubVP high refresh criteria, - * we must return true. This is because cursor updates are asynchronous - * with full updates, so we could transition into a SubVP config and - * remain in HW cursor mode if there's no cursor update which will - * then cause corruption. - */ - if ((refresh_rate >= 120 && refresh_rate <= 175 && - stream->timing.v_addressable >= 1080 && - stream->timing.v_addressable <= 2160) && - (dc->current_state->stream_count > 1 || - (dc->current_state->stream_count == 1 && !stream->allow_freesync))) - return true; - - return false; -} - static enum controller_dp_test_pattern convert_dp_to_controller_test_pattern( enum dp_test_pattern test_pattern) { @@ -3937,6 +3911,10 @@ enum dc_status resource_map_pool_resources( if (!dc->link_srv->dp_decide_link_settings(stream, &pipe_ctx->link_config.dp_link_settings)) return DC_FAIL_DP_LINK_BANDWIDTH; + + dc->link_srv->dp_decide_tunnel_settings(stream, + &pipe_ctx->link_config.dp_tunnel_settings); + if (dc->link_srv->dp_get_encoding_format( &pipe_ctx->link_config.dp_link_settings) == DP_128b_132b_ENCODING) { pipe_ctx->stream_res.hpo_dp_stream_enc = @@ -4259,6 +4237,11 @@ enum dc_status dc_validate_with_context(struct dc *dc, } } + /* clear subvp cursor limitations */ + for (i = 0; i < context->stream_count; i++) { + dc_state_set_stream_subvp_cursor_limit(context->streams[i], context, false); + } + res = dc_validate_global_state(dc, context, fast_validate); /* calculate pixel rate divider after deciding pxiel clock & odm combine */ @@ -4385,8 +4368,7 @@ enum dc_status dc_validate_global_state( result = resource_build_scaling_params_for_context(dc, new_ctx); if (result == DC_OK) - if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate)) - result = DC_FAIL_BANDWIDTH_VALIDATE; + result = dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate); return result; } @@ -5538,25 +5520,19 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, return DC_OK; } -bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream) -{ - if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream)) - return true; - if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 && - ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) - return true; - else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 1080 && - ((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120) - return true; - - return false; -} - struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx) { return &pipe_ctx->plane_res.scl_data.dscl_prog_data; } +static bool resource_allocate_mcache(struct dc_state *context, const struct dc_mcache_params *mcache_params) +{ + if (context->clk_mgr->ctx->dc->res_pool->funcs->program_mcache_pipe_config) + context->clk_mgr->ctx->dc->res_pool->funcs->program_mcache_pipe_config(context, mcache_params); + + return true; +} + void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options) { dml2_options->callbacks.dc = dc; @@ -5576,6 +5552,7 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio dml2_options->callbacks.get_stream_status = &dc_state_get_stream_status; dml2_options->callbacks.get_stream_from_id = &dc_state_get_stream_from_id; dml2_options->callbacks.get_max_flickerless_instant_vtotal_increase = &dc_stream_get_max_flickerless_instant_vtotal_increase; + dml2_options->callbacks.allocate_mcache = &resource_allocate_mcache; dml2_options->svp_pstate.callbacks.dc = dc; dml2_options->svp_pstate.callbacks.add_phantom_plane = &dc_state_add_phantom_plane; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 1b2cce127981..4db7383720fd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -22,6 +22,7 @@ * Authors: AMD * */ +#include "dc_types.h" #include "core_types.h" #include "core_status.h" #include "dc_state.h" @@ -812,8 +813,12 @@ enum dc_status dc_state_add_phantom_stream(const struct dc *dc, if (phantom_stream_status) { phantom_stream_status->mall_stream_config.type = SUBVP_PHANTOM; phantom_stream_status->mall_stream_config.paired_stream = main_stream; + phantom_stream_status->mall_stream_config.subvp_limit_cursor_size = false; + phantom_stream_status->mall_stream_config.cursor_size_limit_subvp = false; } + dc_state_set_stream_subvp_cursor_limit(main_stream, state, true); + return res; } @@ -939,13 +944,20 @@ void dc_state_release_phantom_streams_and_planes( const struct dc *dc, struct dc_state *state) { + unsigned int phantom_count; + struct dc_stream_state *phantom_streams[MAX_PHANTOM_PIPES]; + struct dc_plane_state *phantom_planes[MAX_PHANTOM_PIPES]; int i; - for (i = 0; i < state->phantom_stream_count; i++) - dc_state_release_phantom_stream(dc, state, state->phantom_streams[i]); + phantom_count = state->phantom_stream_count; + memcpy(phantom_streams, state->phantom_streams, sizeof(struct dc_stream_state *) * MAX_PHANTOM_PIPES); + for (i = 0; i < phantom_count; i++) + dc_state_release_phantom_stream(dc, state, phantom_streams[i]); - for (i = 0; i < state->phantom_plane_count; i++) - dc_state_release_phantom_plane(dc, state, state->phantom_planes[i]); + phantom_count = state->phantom_plane_count; + memcpy(phantom_planes, state->phantom_planes, sizeof(struct dc_plane_state *) * MAX_PHANTOM_PIPES); + for (i = 0; i < phantom_count; i++) + dc_state_release_phantom_plane(dc, state, phantom_planes[i]); } struct dc_stream_state *dc_state_get_stream_from_id(const struct dc_state *state, unsigned int id) @@ -977,3 +989,94 @@ bool dc_state_is_fams2_in_use( return is_fams2_in_use; } + +void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream, + struct dc_state *state, + bool limit) +{ + struct dc_stream_status *stream_status; + + stream_status = dc_state_get_stream_status(state, stream); + + if (stream_status) { + stream_status->mall_stream_config.subvp_limit_cursor_size = limit; + } +} + +bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream, + struct dc_state *state) +{ + bool limit = false; + + struct dc_stream_status *stream_status; + + stream_status = dc_state_get_stream_status(state, stream); + + if (stream_status) { + limit = stream_status->mall_stream_config.subvp_limit_cursor_size; + } + + return limit; +} + +void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream, + struct dc_state *state, + bool limit) +{ + struct dc_stream_status *stream_status; + + stream_status = dc_state_get_stream_status(state, stream); + + if (stream_status) { + stream_status->mall_stream_config.cursor_size_limit_subvp = limit; + } +} + +bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream, + struct dc_state *state) +{ + bool limit = false; + + struct dc_stream_status *stream_status; + + stream_status = dc_state_get_stream_status(state, stream); + + if (stream_status) { + limit = stream_status->mall_stream_config.cursor_size_limit_subvp; + } + + return limit; +} + +bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream, + struct dc_state *state) +{ + bool can_clear_limit = false; + + struct dc_stream_status *stream_status; + + stream_status = dc_state_get_stream_status(state, stream); + + if (stream_status) { + can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, state) && + (stream_status->mall_stream_config.type == SUBVP_PHANTOM || + stream->hw_cursor_req || + !stream_status->mall_stream_config.subvp_limit_cursor_size || + !stream->cursor_position.enable || + dc_stream_check_cursor_attributes(stream, state, &stream->cursor_attributes)); + } + + return can_clear_limit; +} + +bool dc_state_is_subvp_in_use(struct dc_state *state) +{ + uint32_t i; + + for (i = 0; i < state->stream_count; i++) { + if (dc_state_get_stream_subvp_type(state, state->streams[i]) != SUBVP_NONE) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 0478dd856d8c..b883fb24fa12 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -265,13 +265,16 @@ void program_cursor_attributes( } /* - * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address + * dc_stream_check_cursor_attributes() - Check validitity of cursor attributes and surface address */ -bool dc_stream_set_cursor_attributes( - struct dc_stream_state *stream, +bool dc_stream_check_cursor_attributes( + const struct dc_stream_state *stream, + struct dc_state *state, const struct dc_cursor_attributes *attributes) { - struct dc *dc; + const struct dc *dc; + + unsigned int max_cursor_size; if (NULL == stream) { dm_error("DC: dc_stream is NULL!\n"); @@ -289,24 +292,38 @@ bool dc_stream_set_cursor_attributes( dc = stream->ctx->dc; - /* SubVP is not compatible with HW cursor larger than 64 x 64 x 4. - * Therefore, if cursor is greater than 64 x 64 x 4, fallback to SW cursor in the following case: - * 1. If the config is a candidate for SubVP high refresh (both single an dual display configs) - * 2. If not subvp high refresh, for single display cases, if resolution is >= 5K and refresh rate < 120hz - * 3. If not subvp high refresh, for multi display cases, if resolution is >= 4K and refresh rate < 120hz + /* SubVP is not compatible with HW cursor larger than what can fit in cursor SRAM. + * Therefore, if cursor is greater than this, fallback to SW cursor. */ - if (dc->debug.allow_sw_cursor_fallback && - attributes->height * attributes->width * 4 > 16384 && - !stream->hw_cursor_req) { - if (check_subvp_sw_cursor_fallback_req(dc, stream)) + if (dc->debug.allow_sw_cursor_fallback && dc->res_pool->funcs->get_max_hw_cursor_size) { + max_cursor_size = dc->res_pool->funcs->get_max_hw_cursor_size(dc, state, stream); + max_cursor_size = max_cursor_size * max_cursor_size * 4; + + if (attributes->height * attributes->width * 4 > max_cursor_size) { return false; + } } - stream->cursor_attributes = *attributes; - return true; } +/* + * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address + */ +bool dc_stream_set_cursor_attributes( + struct dc_stream_state *stream, + const struct dc_cursor_attributes *attributes) +{ + bool result = false; + + if (dc_stream_check_cursor_attributes(stream, stream->ctx->dc->current_state, attributes)) { + stream->cursor_attributes = *attributes; + result = true; + } + + return result; +} + bool dc_stream_program_cursor_attributes( struct dc_stream_state *stream, const struct dc_cursor_attributes *attributes) @@ -552,6 +569,14 @@ bool dc_stream_fc_disable_writeback(struct dc *dc, return true; } +/** + * dc_stream_remove_writeback() - Disables writeback and removes writeback info. + * @dc: Display core control structure. + * @stream: Display core stream state. + * @dwb_pipe_inst: Display writeback pipe. + * + * Return: returns true on success, false otherwise. + */ bool dc_stream_remove_writeback(struct dc *dc, struct dc_stream_state *stream, uint32_t dwb_pipe_inst) @@ -1109,3 +1134,26 @@ unsigned int dc_stream_get_max_flickerless_instant_vtotal_increase(struct dc_str return dc_stream_get_max_flickerless_instant_vtotal_delta(stream, is_gaming, false); } + +bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream) +{ + bool is_limit_pending = false; + + if (dc->current_state) + is_limit_pending = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state); + + return is_limit_pending; +} + +bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream) +{ + bool can_clear_limit = false; + + if (dc->current_state) + can_clear_limit = dc_state_get_stream_cursor_subvp_limit(stream, dc->current_state) && + (stream->hw_cursor_req || + !stream->cursor_position.enable || + dc_stream_check_cursor_attributes(stream, dc->current_state, &stream->cursor_attributes)); + + return can_clear_limit; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index e6fcc21bb9bc..922f23557f5d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -109,7 +109,8 @@ struct dc_plane_state *dc_create_plane_state(const struct dc *dc) ***************************************************************************** */ const struct dc_plane_status *dc_plane_get_status( - const struct dc_plane_state *plane_state) + const struct dc_plane_state *plane_state, + union dc_plane_status_update_flags flags) { const struct dc_plane_status *plane_status; struct dc *dc; @@ -136,7 +137,7 @@ const struct dc_plane_status *dc_plane_get_status( if (pipe_ctx->plane_state != plane_state) continue; - if (pipe_ctx->plane_state) + if (pipe_ctx->plane_state && flags.bits.address) pipe_ctx->plane_state->status.is_flip_pending = false; break; @@ -151,7 +152,8 @@ const struct dc_plane_status *dc_plane_get_status( if (pipe_ctx->plane_state != plane_state) continue; - dc->hwss.update_pending_status(pipe_ctx); + if (flags.bits.address) + dc->hwss.update_pending_status(pipe_ctx); } return plane_status; @@ -294,3 +296,17 @@ void dc_plane_force_dcc_and_tiling_disable(struct dc_plane_state *plane_state, dc->hwss.clear_surface_dcc_and_tiling(pipe_ctx, plane_state, clear_tiling); } } + +void dc_plane_copy_config(struct dc_plane_state *dst, const struct dc_plane_state *src) +{ + struct kref temp_refcount; + + /* backup persistent info */ + memcpy(&temp_refcount, &dst->refcount, sizeof(struct kref)); + + /* copy all configuration information */ + memcpy(dst, src, sizeof(struct dc_plane_state)); + + /* restore persistent info */ + memcpy(&dst->refcount, &temp_refcount, sizeof(struct kref)); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7c2ee0526926..1d917be36fc4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -53,7 +53,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.325" +#define DC_VER "3.2.334" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC @@ -249,6 +249,7 @@ struct dc_caps { uint32_t i2c_speed_in_khz_hdcp; uint32_t dmdata_alloc_size; unsigned int max_cursor_size; + unsigned int max_buffered_cursor_size; unsigned int max_video_width; /* * max video plane width that can be safely assumed to be always @@ -282,6 +283,7 @@ struct dc_caps { bool edp_dsc_support; bool vbios_lttpr_aware; bool vbios_lttpr_enable; + bool fused_io_supported; uint32_t max_otg_num; uint32_t max_cab_allocation_bytes; uint32_t cache_line_size; @@ -447,6 +449,7 @@ struct dc_config { bool enable_windowed_mpo_odm; bool forceHBR2CP2520; // Used for switching between test patterns TPS4 and CP2520 uint32_t allow_edp_hotplug_detection; + bool skip_riommu_prefetch_wa; bool clamp_min_dcfclk; uint64_t vblank_alignment_dto_params; uint8_t vblank_alignment_max_frame_time_diff; @@ -496,6 +499,7 @@ enum visual_confirm { VISUAL_CONFIRM_HW_CURSOR = 20, VISUAL_CONFIRM_VABC = 21, VISUAL_CONFIRM_DCC = 22, + VISUAL_CONFIRM_EXPLICIT = 0x80000000, }; enum dc_psr_power_opts { @@ -902,6 +906,9 @@ struct dc_debug_options { bool voltage_align_fclk; bool disable_min_fclk; + bool hdcp_lc_force_fw_enable; + bool hdcp_lc_enable_sw_fallback; + bool disable_dfs_bypass; bool disable_dpp_power_gate; bool disable_hubp_power_gate; @@ -1418,6 +1425,171 @@ struct dc_scratch_space { struct dc_stream_state stream_state; }; +/* + * A link contains one or more sinks and their connected status. + * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. + */ + struct dc_link { + struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK]; + unsigned int sink_count; + struct dc_sink *local_sink; + unsigned int link_index; + enum dc_connection_type type; + enum signal_type connector_signal; + enum dc_irq_source irq_source_hpd; + enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ + enum dc_irq_source irq_source_read_request;/* Read Request */ + + bool is_hpd_filter_disabled; + bool dp_ss_off; + + /** + * @link_state_valid: + * + * If there is no link and local sink, this variable should be set to + * false. Otherwise, it should be set to true; usually, the function + * core_link_enable_stream sets this field to true. + */ + bool link_state_valid; + bool aux_access_disabled; + bool sync_lt_in_progress; + bool skip_stream_reenable; + bool is_internal_display; + /** @todo Rename. Flag an endpoint as having a programmable mapping to a DIG encoder. */ + bool is_dig_mapping_flexible; + bool hpd_status; /* HPD status of link without physical HPD pin. */ + bool is_hpd_pending; /* Indicates a new received hpd */ + + /* USB4 DPIA links skip verifying link cap, instead performing the fallback method + * for every link training. This is incompatible with DP LL compliance automation, + * which expects the same link settings to be used every retry on a link loss. + * This flag is used to skip the fallback when link loss occurs during automation. + */ + bool skip_fallback_on_link_loss; + + bool edp_sink_present; + + struct dp_trace dp_trace; + + /* caps is the same as reported_link_cap. link_traing use + * reported_link_cap. Will clean up. TODO + */ + struct dc_link_settings reported_link_cap; + struct dc_link_settings verified_link_cap; + struct dc_link_settings cur_link_settings; + struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX]; + struct dc_link_settings preferred_link_setting; + /* preferred_training_settings are override values that + * come from DM. DM is responsible for the memory + * management of the override pointers. + */ + struct dc_link_training_overrides preferred_training_settings; + struct dp_audio_test_data audio_test_data; + + uint8_t ddc_hw_inst; + + uint8_t hpd_src; + + uint8_t link_enc_hw_inst; + /* DIG link encoder ID. Used as index in link encoder resource pool. + * For links with fixed mapping to DIG, this is not changed after dc_link + * object creation. + */ + enum engine_id eng_id; + enum engine_id dpia_preferred_eng_id; + + bool test_pattern_enabled; + /* Pending/Current test pattern are only used to perform and track + * FIXED_VS retimer test pattern/lane adjustment override state. + * Pending allows link HWSS to differentiate PHY vs non-PHY pattern, + * to perform specific lane adjust overrides before setting certain + * PHY test patterns. In cases when lane adjust and set test pattern + * calls are not performed atomically (i.e. performing link training), + * pending_test_pattern will be invalid or contain a non-PHY test pattern + * and current_test_pattern will contain required context for any future + * set pattern/set lane adjust to transition between override state(s). + * */ + enum dp_test_pattern current_test_pattern; + enum dp_test_pattern pending_test_pattern; + + union compliance_test_state compliance_test_state; + + void *priv; + + struct ddc_service *ddc; + + enum dp_panel_mode panel_mode; + bool aux_mode; + + /* Private to DC core */ + + const struct dc *dc; + + struct dc_context *ctx; + + struct panel_cntl *panel_cntl; + struct link_encoder *link_enc; + struct graphics_object_id link_id; + /* Endpoint type distinguishes display endpoints which do not have entries + * in the BIOS connector table from those that do. Helps when tracking link + * encoder to display endpoint assignments. + */ + enum display_endpoint_type ep_type; + union ddi_channel_mapping ddi_channel_mapping; + struct connector_device_tag_info device_tag; + struct dpcd_caps dpcd_caps; + uint32_t dongle_max_pix_clk; + unsigned short chip_caps; + unsigned int dpcd_sink_count; + struct hdcp_caps hdcp_caps; + enum edp_revision edp_revision; + union dpcd_sink_ext_caps dpcd_sink_ext_caps; + + struct psr_settings psr_settings; + struct replay_settings replay_settings; + + /* Drive settings read from integrated info table */ + struct dc_lane_settings bios_forced_drive_settings; + + /* Vendor specific LTTPR workaround variables */ + uint8_t vendor_specific_lttpr_link_rate_wa; + bool apply_vendor_specific_lttpr_link_rate_wa; + + /* MST record stream using this link */ + struct link_flags { + bool dp_keep_receiver_powered; + bool dp_skip_DID2; + bool dp_skip_reset_segment; + bool dp_skip_fs_144hz; + bool dp_mot_reset_segment; + /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */ + bool dpia_mst_dsc_always_on; + /* Forced DPIA into TBT3 compatibility mode. */ + bool dpia_forced_tbt3_mode; + bool dongle_mode_timing_override; + bool blank_stream_on_ocs_change; + bool read_dpcd204h_on_irq_hpd; + bool force_dp_ffe_preset; + } wa_flags; + union dc_dp_ffe_preset forced_dp_ffe_preset; + struct link_mst_stream_allocation_table mst_stream_alloc_table; + + struct dc_link_status link_status; + struct dprx_states dprx_states; + + struct gpio *hpd_gpio; + enum dc_link_fec_state fec_state; + bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly + + struct dc_panel_config panel_config; + struct phy_state phy_state; + uint32_t phy_transition_bitmask; + // BW ALLOCATON USB4 ONLY + struct dc_dpia_bw_alloc dpia_bw_alloc_config; + bool skip_implict_edp_power_control; + enum backlight_control_type backlight_control_type; +}; + struct dc { struct dc_debug_options debug; struct dc_versions versions; @@ -1485,6 +1657,7 @@ struct dc { struct dc_scratch_space current_state; struct dc_scratch_space new_state; struct dc_stream_state temp_stream; // Used so we don't need to allocate stream on the stack + struct dc_link temp_link; bool pipes_to_unlock_first[MAX_PIPES]; /* Any of the pipes indicated here should be unlocked first */ } scratch; @@ -1651,170 +1824,6 @@ uint32_t dc_bandwidth_in_kbps_from_timing( const enum dc_link_encoding_format link_encoding); /* Link Interfaces */ -/* - * A link contains one or more sinks and their connected status. - * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. - */ -struct dc_link { - struct dc_sink *remote_sinks[MAX_SINKS_PER_LINK]; - unsigned int sink_count; - struct dc_sink *local_sink; - unsigned int link_index; - enum dc_connection_type type; - enum signal_type connector_signal; - enum dc_irq_source irq_source_hpd; - enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ - - bool is_hpd_filter_disabled; - bool dp_ss_off; - - /** - * @link_state_valid: - * - * If there is no link and local sink, this variable should be set to - * false. Otherwise, it should be set to true; usually, the function - * core_link_enable_stream sets this field to true. - */ - bool link_state_valid; - bool aux_access_disabled; - bool sync_lt_in_progress; - bool skip_stream_reenable; - bool is_internal_display; - /** @todo Rename. Flag an endpoint as having a programmable mapping to a DIG encoder. */ - bool is_dig_mapping_flexible; - bool hpd_status; /* HPD status of link without physical HPD pin. */ - bool is_hpd_pending; /* Indicates a new received hpd */ - - /* USB4 DPIA links skip verifying link cap, instead performing the fallback method - * for every link training. This is incompatible with DP LL compliance automation, - * which expects the same link settings to be used every retry on a link loss. - * This flag is used to skip the fallback when link loss occurs during automation. - */ - bool skip_fallback_on_link_loss; - - bool edp_sink_present; - - struct dp_trace dp_trace; - - /* caps is the same as reported_link_cap. link_traing use - * reported_link_cap. Will clean up. TODO - */ - struct dc_link_settings reported_link_cap; - struct dc_link_settings verified_link_cap; - struct dc_link_settings cur_link_settings; - struct dc_lane_settings cur_lane_setting[LANE_COUNT_DP_MAX]; - struct dc_link_settings preferred_link_setting; - /* preferred_training_settings are override values that - * come from DM. DM is responsible for the memory - * management of the override pointers. - */ - struct dc_link_training_overrides preferred_training_settings; - struct dp_audio_test_data audio_test_data; - - uint8_t ddc_hw_inst; - - uint8_t hpd_src; - - uint8_t link_enc_hw_inst; - /* DIG link encoder ID. Used as index in link encoder resource pool. - * For links with fixed mapping to DIG, this is not changed after dc_link - * object creation. - */ - enum engine_id eng_id; - enum engine_id dpia_preferred_eng_id; - - bool test_pattern_enabled; - /* Pending/Current test pattern are only used to perform and track - * FIXED_VS retimer test pattern/lane adjustment override state. - * Pending allows link HWSS to differentiate PHY vs non-PHY pattern, - * to perform specific lane adjust overrides before setting certain - * PHY test patterns. In cases when lane adjust and set test pattern - * calls are not performed atomically (i.e. performing link training), - * pending_test_pattern will be invalid or contain a non-PHY test pattern - * and current_test_pattern will contain required context for any future - * set pattern/set lane adjust to transition between override state(s). - * */ - enum dp_test_pattern current_test_pattern; - enum dp_test_pattern pending_test_pattern; - - union compliance_test_state compliance_test_state; - - void *priv; - - struct ddc_service *ddc; - - enum dp_panel_mode panel_mode; - bool aux_mode; - - /* Private to DC core */ - - const struct dc *dc; - - struct dc_context *ctx; - - struct panel_cntl *panel_cntl; - struct link_encoder *link_enc; - struct graphics_object_id link_id; - /* Endpoint type distinguishes display endpoints which do not have entries - * in the BIOS connector table from those that do. Helps when tracking link - * encoder to display endpoint assignments. - */ - enum display_endpoint_type ep_type; - union ddi_channel_mapping ddi_channel_mapping; - struct connector_device_tag_info device_tag; - struct dpcd_caps dpcd_caps; - uint32_t dongle_max_pix_clk; - unsigned short chip_caps; - unsigned int dpcd_sink_count; - struct hdcp_caps hdcp_caps; - enum edp_revision edp_revision; - union dpcd_sink_ext_caps dpcd_sink_ext_caps; - - struct psr_settings psr_settings; - struct replay_settings replay_settings; - - /* Drive settings read from integrated info table */ - struct dc_lane_settings bios_forced_drive_settings; - - /* Vendor specific LTTPR workaround variables */ - uint8_t vendor_specific_lttpr_link_rate_wa; - bool apply_vendor_specific_lttpr_link_rate_wa; - - /* MST record stream using this link */ - struct link_flags { - bool dp_keep_receiver_powered; - bool dp_skip_DID2; - bool dp_skip_reset_segment; - bool dp_skip_fs_144hz; - bool dp_mot_reset_segment; - /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */ - bool dpia_mst_dsc_always_on; - /* Forced DPIA into TBT3 compatibility mode. */ - bool dpia_forced_tbt3_mode; - bool dongle_mode_timing_override; - bool blank_stream_on_ocs_change; - bool read_dpcd204h_on_irq_hpd; - bool force_dp_ffe_preset; - } wa_flags; - union dc_dp_ffe_preset forced_dp_ffe_preset; - struct link_mst_stream_allocation_table mst_stream_alloc_table; - - struct dc_link_status link_status; - struct dprx_states dprx_states; - - struct gpio *hpd_gpio; - enum dc_link_fec_state fec_state; - bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly - - struct dc_panel_config panel_config; - struct phy_state phy_state; - uint32_t phy_transition_bitmask; - // BW ALLOCATON USB4 ONLY - struct dc_dpia_bw_alloc dpia_bw_alloc_config; - bool skip_implict_edp_power_control; - enum backlight_control_type backlight_control_type; -}; - /* Return an enumerated dc_link. * dc_link order is constant and determined at * boot time. They cannot be created or destroyed. @@ -2589,10 +2598,18 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); /* DSC Interfaces */ #include "dc_dsc.h" +void dc_get_visual_confirm_for_stream( + struct dc *dc, + struct dc_stream_state *stream_state, + struct tg_color *color); + /* Disable acc mode Interfaces */ void dc_disable_accelerated_mode(struct dc *dc); bool dc_is_timing_changed(struct dc_stream_state *cur_stream, struct dc_stream_state *new_stream); +bool dc_is_cursor_limit_pending(struct dc *dc); +bool dc_can_clear_cursor_limit(struct dc *dc); + #endif /* DC_INTERFACE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 614e03bfd598..afbcf866520e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -39,6 +39,7 @@ #define CTX dc_dmub_srv->ctx #define DC_LOGGER CTX->logger +#define GPINT_RETRY_NUM 20 static void dc_dmub_srv_construct(struct dc_dmub_srv *dc_srv, struct dc *dc, struct dmub_srv *dmub) @@ -70,20 +71,28 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv) } } -void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) +bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv) { - struct dmub_srv *dmub = dc_dmub_srv->dmub; - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dmub_srv *dmub; + struct dc_context *dc_ctx; enum dmub_status status; + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dc_ctx = dc_dmub_srv->ctx; + dmub = dc_dmub_srv->dmub; + do { - status = dmub_srv_wait_for_idle(dmub, 100000); + status = dmub_srv_wait_for_pending(dmub, 100000); } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); if (status != DMUB_STATUS_OK) { DC_ERROR("Error waiting for DMUB idle: status=%d\n", status); dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); } + + return status == DMUB_STATUS_OK; } void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv) @@ -126,7 +135,49 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv, } } -bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, +static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + struct dc_context *dc_ctx; + struct dmub_srv *dmub; + enum dmub_status status = DMUB_STATUS_OK; + int i; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dc_ctx = dc_dmub_srv->ctx; + dmub = dc_dmub_srv->dmub; + + for (i = 0 ; i < count; i++) { + /* confirm no messages pending */ + do { + status = dmub_srv_wait_for_idle(dmub, 100000); + } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); + + /* queue command */ + if (status == DMUB_STATUS_OK) + status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]); + + /* check for errors */ + if (status != DMUB_STATUS_OK) { + break; + } + } + + if (status != DMUB_STATUS_OK) { + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } + return false; + } + + return true; +} + +static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list) { @@ -143,20 +194,25 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, for (i = 0 ; i < count; i++) { // Queue command - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + if (!cmd_list[i].cmd_common.header.multi_cmd_pending || + dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) { + status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); + } else { + status = DMUB_STATUS_QUEUE_FULL; + } if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - status = dmub_srv_cmd_execute(dmub); + status = dmub_srv_fb_cmd_execute(dmub); if (status == DMUB_STATUS_POWER_STATE_D3) return false; do { - status = dmub_srv_wait_for_idle(dmub, 100000); + status = dmub_srv_wait_for_inbox_free(dmub, 100000, count - i); } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); /* Requeue the command. */ - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); } if (status != DMUB_STATUS_OK) { @@ -168,7 +224,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, } } - status = dmub_srv_cmd_execute(dmub); + status = dmub_srv_fb_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { if (status != DMUB_STATUS_POWER_STATE_D3) { DC_ERROR("Error starting DMUB execution: status=%d\n", status); @@ -180,6 +236,26 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, return true; } +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + bool res = false; + + if (dc_dmub_srv && dc_dmub_srv->dmub) { + if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) { + res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); + } else { + res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); + } + + if (res) + res = dmub_srv_update_inbox_status(dc_dmub_srv->dmub) == DMUB_STATUS_OK; + } + + return res; +} + bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, enum dm_dmub_wait_type wait_type, union dmub_rb_cmd *cmd_list) @@ -202,7 +278,8 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); if (!dmub->debug.timeout_info.timeout_occured) { dmub->debug.timeout_info.timeout_occured = true; - dmub->debug.timeout_info.timeout_cmd = *cmd_list; + if (cmd_list) + dmub->debug.timeout_info.timeout_cmd = *cmd_list; dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx); } dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); @@ -210,8 +287,9 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, } // Copy data back from ring buffer into command - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) { + dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list); + } } return true; @@ -224,74 +302,10 @@ bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type) { - struct dc_context *dc_ctx; - struct dmub_srv *dmub; - enum dmub_status status; - int i; - - if (!dc_dmub_srv || !dc_dmub_srv->dmub) + if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list)) return false; - dc_ctx = dc_dmub_srv->ctx; - dmub = dc_dmub_srv->dmub; - - for (i = 0 ; i < count; i++) { - // Queue command - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); - - if (status == DMUB_STATUS_QUEUE_FULL) { - /* Execute and wait for queue to become empty again. */ - status = dmub_srv_cmd_execute(dmub); - if (status == DMUB_STATUS_POWER_STATE_D3) - return false; - - status = dmub_srv_wait_for_idle(dmub, 100000); - if (status != DMUB_STATUS_OK) - return false; - - /* Requeue the command. */ - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); - } - - if (status != DMUB_STATUS_OK) { - if (status != DMUB_STATUS_POWER_STATE_D3) { - DC_ERROR("Error queueing DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - } - return false; - } - } - - status = dmub_srv_cmd_execute(dmub); - if (status != DMUB_STATUS_OK) { - if (status != DMUB_STATUS_POWER_STATE_D3) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - } - return false; - } - - // Wait for DMUB to process command - if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { - if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { - do { - status = dmub_srv_wait_for_idle(dmub, 100000); - } while (status != DMUB_STATUS_OK); - } else - status = dmub_srv_wait_for_idle(dmub, 100000); - - if (status != DMUB_STATUS_OK) { - DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - return false; - } - - // Copy data back from ring buffer into command - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); - } - - return true; + return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list); } bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) @@ -1243,7 +1257,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); + dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); memset(&new_signals, 0, sizeof(new_signals)); @@ -1355,14 +1369,15 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!dc->debug.optimize_ips_handshake || !ips_fw->signals.bits.ips2_commit) udelay(dc->debug.ips2_eval_delay_us); - if (ips_fw->signals.bits.ips2_commit) { - DC_LOG_IPS( - "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)", - ips_fw->signals.bits.ips1_commit, - ips_fw->signals.bits.ips2_commit); + DC_LOG_IPS( + "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)", + ips_fw->signals.bits.ips1_commit, + ips_fw->signals.bits.ips2_commit); - // Tell PMFW to exit low power state - dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); + // Tell PMFW to exit low power state + dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); + + if (ips_fw->signals.bits.ips2_commit) { DC_LOG_IPS( "wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)", @@ -1400,7 +1415,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dmub_srv_sync_inbox1(dc->ctx->dmub_srv->dmub); + dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub); } } @@ -1654,7 +1669,8 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* fill in generic command header */ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + global_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); if (enable) { /* send global configuration parameters */ @@ -1673,11 +1689,13 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* configure command header */ stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_base_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_base_cmd->header.multi_cmd_pending = 1; stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_sub_state_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_sub_state_cmd->header.multi_cmd_pending = 1; /* copy stream static base state */ memcpy(&stream_base_cmd->config, @@ -1723,7 +1741,8 @@ void dc_dmub_srv_fams2_drr_update(struct dc *dc, cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num; cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger; - cmd.fams2_drr_update.header.payload_bytes = sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); + cmd.fams2_drr_update.header.payload_bytes = + sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -1759,7 +1778,8 @@ void dc_dmub_srv_fams2_passthrough_flip( /* build command header */ cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP; - cmds[num_cmds].fams2_flip.header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2_flip); + cmds[num_cmds].fams2_flip.header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header); /* for chaining multiple commands, all but last command should set to 1 */ cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1; @@ -1869,11 +1889,14 @@ void dc_dmub_srv_ips_query_residency_info(struct dc_dmub_srv *dc_dmub_srv, struc if (command_code == DMUB_GPINT__INVALID_COMMAND) return; - // send gpint commands and wait for ack - if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_PERCENT, - (uint16_t)(output->ips_mode), - &output->residency_percent, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - output->residency_percent = 0; + for (i = 0; i < GPINT_RETRY_NUM; i++) { + // false could mean GPINT timeout, in which case we should retry + if (dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_PERCENT, + (uint16_t)(output->ips_mode), &output->residency_percent, + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + break; + udelay(100); + } if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_ENTRY_COUNTER, (uint16_t)(output->ips_mode), diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index a636f4c3f01d..ada5c2fb2db3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -58,7 +58,7 @@ struct dc_dmub_srv { bool needs_idle_wake; }; -void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); +bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv); bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 77c87ad57220..0bad8304ccf6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -159,6 +159,11 @@ struct dc_link_settings { uint8_t link_rate_set; }; +struct dc_tunnel_settings { + bool should_enable_dp_tunneling; + bool should_use_dp_bw_allocation; +}; + union dc_dp_ffe_preset { struct { uint8_t level : 4; @@ -943,10 +948,20 @@ union dpia_info { uint8_t raw; }; +/* DPCD[0xE0020] USB4_DRIVER_BW_CAPABILITY register. */ +union usb4_driver_bw_cap { + struct { + uint8_t rsvd :7; + uint8_t driver_bw_alloc_support :1; + } bits; + uint8_t raw; +}; + /* DP Tunneling over USB4 */ struct dpcd_usb4_dp_tunneling_info { union dp_tun_cap_support dp_tun_cap; union dpia_info dpia_info; + union usb4_driver_bw_cap driver_bw_cap; uint8_t usb4_driver_id; uint8_t usb4_topology_id[DPCD_USB4_TOPOLOGY_ID_LEN]; }; @@ -1486,5 +1501,11 @@ struct dp_trace { # ifndef DP_TUNNELING_BW_ALLOC_CAP_CHANGED # define DP_TUNNELING_BW_ALLOC_CAP_CHANGED (1 << 3) # endif +# ifndef DPTX_BW_ALLOC_UNMASK_IRQ +# define DPTX_BW_ALLOC_UNMASK_IRQ (1 << 6) +# endif +# ifndef DPTX_BW_ALLOC_MODE_ENABLE +# define DPTX_BW_ALLOC_MODE_ENABLE (1 << 7) +# endif #endif /* DC_DP_TYPES_H */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.c b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c new file mode 100644 index 000000000000..fee69642fb93 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "dc_fused_io.h" + +#include "dm_helpers.h" +#include "gpio.h" + +static bool op_i2c_convert( + union dmub_rb_cmd *cmd, + const struct mod_hdcp_atomic_op_i2c *op, + enum dmub_cmd_fused_request_type type, + uint32_t ddc_line, + bool over_aux +) +{ + struct dmub_cmd_fused_request *req = &cmd->fused_io.request; + struct dmub_cmd_fused_request_location_i2c *loc = &req->u.i2c; + + if (!op || op->size > sizeof(req->buffer)) + return false; + + req->type = type; + loc->is_aux = false; + loc->ddc_line = ddc_line; + loc->over_aux = over_aux; + loc->address = op->address; + loc->offset = op->offset; + loc->length = op->size; + memcpy(req->buffer, op->data, op->size); + + return true; +} + +static bool op_aux_convert( + union dmub_rb_cmd *cmd, + const struct mod_hdcp_atomic_op_aux *op, + enum dmub_cmd_fused_request_type type, + uint32_t ddc_line +) +{ + struct dmub_cmd_fused_request *req = &cmd->fused_io.request; + struct dmub_cmd_fused_request_location_aux *loc = &req->u.aux; + + if (!op || op->size > sizeof(req->buffer)) + return false; + + req->type = type; + loc->is_aux = true; + loc->ddc_line = ddc_line; + loc->address = op->address; + loc->length = op->size; + memcpy(req->buffer, op->data, op->size); + + return true; +} + +static bool atomic_write_poll_read( + struct dc_link *link, + union dmub_rb_cmd commands[3], + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + const uint8_t count = 3; + const uint32_t timeout_per_request_us = 10000; + const uint32_t timeout_per_aux_transaction_us = 10000; + uint64_t timeout_us = 0; + + commands[1].fused_io.request.poll_mask_msb = poll_mask_msb; + commands[1].fused_io.request.timeout_us = poll_timeout_us; + + for (uint8_t i = 0; i < count; i++) { + struct dmub_rb_cmd_fused_io *io = &commands[i].fused_io; + + io->header.type = DMUB_CMD__FUSED_IO; + io->header.sub_type = DMUB_CMD__FUSED_IO_EXECUTE; + io->header.multi_cmd_pending = i != count - 1; + io->header.payload_bytes = sizeof(commands[i].fused_io) - sizeof(io->header); + + timeout_us += timeout_per_request_us + io->request.timeout_us; + if (!io->request.timeout_us && io->request.u.aux.is_aux) + timeout_us += timeout_per_aux_transaction_us * (io->request.u.aux.length / 16); + } + + if (!dm_helpers_execute_fused_io(link->ctx, link, commands, count, timeout_us)) + return false; + + return commands[0].fused_io.request.status == FUSED_REQUEST_STATUS_SUCCESS; +} + +bool dm_atomic_write_poll_read_i2c( + struct dc_link *link, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + if (!link) + return false; + + const bool over_aux = false; + const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en; + + union dmub_rb_cmd commands[3] = { 0 }; + const bool converted = op_i2c_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line, over_aux) + && op_i2c_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line, over_aux) + && op_i2c_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line, over_aux); + + if (!converted) + return false; + + const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb); + + memcpy(read->data, commands[0].fused_io.request.buffer, read->size); + return result; +} + +bool dm_atomic_write_poll_read_aux( + struct dc_link *link, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +) +{ + if (!link) + return false; + + const uint32_t ddc_line = link->ddc->ddc_pin->pin_data->en; + union dmub_rb_cmd commands[3] = { 0 }; + const bool converted = op_aux_convert(&commands[0], write, FUSED_REQUEST_WRITE, ddc_line) + && op_aux_convert(&commands[1], poll, FUSED_REQUEST_POLL, ddc_line) + && op_aux_convert(&commands[2], read, FUSED_REQUEST_READ, ddc_line); + + if (!converted) + return false; + + const bool result = atomic_write_poll_read(link, commands, poll_timeout_us, poll_mask_msb); + + memcpy(read->data, commands[0].fused_io.request.buffer, read->size); + return result; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dc_fused_io.h b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h new file mode 100644 index 000000000000..c74917240985 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dc_fused_io.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __DC_FUSED_IO_H__ +#define __DC_FUSED_IO_H__ + +#include "dc.h" +#include "mod_hdcp.h" + +bool dm_atomic_write_poll_read_i2c( + struct dc_link *link, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +); + +bool dm_atomic_write_poll_read_aux( + struct dc_link *link, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb +); + +#endif // __DC_FUSED_IO_H__ + diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 8f077e15b4f0..7217de258851 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -682,13 +682,19 @@ void reg_sequence_wait_done(const struct dc_context *ctx) if (offload && ctx->dc->debug.dmub_offload_enabled && !ctx->dc->debug.dmcub_emulation) { - dc_dmub_srv_wait_idle(ctx->dmub_srv); + dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); } } char *dce_version_to_string(const int version) { switch (version) { + case DCE_VERSION_6_0: + return "DCE 6.0"; + case DCE_VERSION_6_1: + return "DCE 6.1"; + case DCE_VERSION_6_4: + return "DCE 6.4"; case DCE_VERSION_8_0: return "DCE 8.0"; case DCE_VERSION_8_1: diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h index e9413685ed4f..14feb843e694 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_plane.h +++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h @@ -28,13 +28,24 @@ #include "dc_hw_types.h" +union dc_plane_status_update_flags { + struct { + uint32_t address : 1; + } bits; + uint32_t raw; +}; + struct dc_plane_state *dc_create_plane_state(const struct dc *dc); const struct dc_plane_status *dc_plane_get_status( - const struct dc_plane_state *plane_state); + const struct dc_plane_state *plane_state, + union dc_plane_status_update_flags flags); void dc_plane_state_retain(struct dc_plane_state *plane_state); void dc_plane_state_release(struct dc_plane_state *plane_state); void dc_plane_force_dcc_and_tiling_disable(struct dc_plane_state *plane_state, bool clear_tiling); + +void dc_plane_copy_config(struct dc_plane_state *dst, const struct dc_plane_state *src); + #endif /* _DC_PLANE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h index 1a12ef579ff4..1d9bae56ff6a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_state_priv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_state_priv.h @@ -105,4 +105,24 @@ bool dc_state_is_fams2_in_use( const struct dc *dc, const struct dc_state *state); + +void dc_state_set_stream_subvp_cursor_limit(const struct dc_stream_state *stream, + struct dc_state *state, + bool limit); + +bool dc_state_get_stream_subvp_cursor_limit(const struct dc_stream_state *stream, + struct dc_state *state); + +void dc_state_set_stream_cursor_subvp_limit(const struct dc_stream_state *stream, + struct dc_state *state, + bool limit); + +bool dc_state_get_stream_cursor_subvp_limit(const struct dc_stream_state *stream, + struct dc_state *state); + +bool dc_state_can_clear_stream_cursor_subvp_limit(const struct dc_stream_state *stream, + struct dc_state *state); + +bool dc_state_is_subvp_in_use(struct dc_state *state); + #endif /* _DC_STATE_PRIV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index e0bfddaa23e3..341d2ffb64b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -44,6 +44,8 @@ struct mall_stream_config { */ enum mall_stream_type type; struct dc_stream_state *paired_stream; // master / slave stream + bool subvp_limit_cursor_size; /* stream has/is using subvp limiting hw cursor support */ + bool cursor_size_limit_subvp; /* stream is using hw cursor config preventing subvp */ }; struct dc_stream_status { @@ -503,6 +505,11 @@ void program_cursor_position( struct dc *dc, struct dc_stream_state *stream); +bool dc_stream_check_cursor_attributes( + const struct dc_stream_state *stream, + struct dc_state *state, + const struct dc_cursor_attributes *attributes); + bool dc_stream_set_cursor_attributes( struct dc_stream_state *stream, const struct dc_cursor_attributes *attributes); @@ -579,4 +586,8 @@ void dc_dmub_update_dirty_rect(struct dc *dc, struct dc_stream_state *stream, struct dc_surface_update *srf_updates, struct dc_state *context); + +bool dc_stream_is_cursor_limit_pending(struct dc *dc, struct dc_stream_state *stream); +bool dc_stream_can_clear_cursor_limit(struct dc *dc, struct dc_stream_state *stream); + #endif /* DC_STREAM_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 83ffaae9f439..a4cd0eb39a3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -210,6 +210,7 @@ struct dc_edid_caps { bool edid_hdmi; bool hdr_supported; + bool rr_capable; struct dc_panel_patch panel_patch; }; @@ -1089,7 +1090,8 @@ union replay_low_refresh_rate_enable_options { struct { //BIT[0-3]: Replay Low Hz Support control unsigned int ENABLE_LOW_RR_SUPPORT :1; - unsigned int RESERVED_1_3 :3; + unsigned int SKIP_ASIC_CHECK :1; + unsigned int RESERVED_2_3 :2; //BIT[4-15]: Replay Low Hz Enable Scenarios unsigned int ENABLE_STATIC_SCREEN :1; unsigned int ENABLE_FULL_SCREEN_VIDEO :1; @@ -1129,6 +1131,10 @@ struct replay_config { union replay_low_refresh_rate_enable_options low_rr_enable_options; /* Replay coasting vtotal is within low refresh rate range. */ bool low_rr_activated; + /* Replay low refresh rate supported*/ + bool low_rr_supported; + /* Replay Video Conferencing Optimization Enabled */ + bool replay_video_conferencing_optimization_enabled; }; /* Replay feature flags*/ @@ -1249,6 +1255,7 @@ enum dc_cm2_gpu_mem_layout { enum dc_cm2_gpu_mem_pixel_component_order { DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA, + DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA }; enum dc_cm2_gpu_mem_format { @@ -1270,7 +1277,8 @@ struct dc_cm2_gpu_mem_format_parameters { enum dc_cm2_gpu_mem_size { DC_CM2_GPU_MEM_SIZE_171717, - DC_CM2_GPU_MEM_SIZE_TRANSFORMED + DC_CM2_GPU_MEM_SIZE_333333, + DC_CM2_GPU_MEM_SIZE_TRANSFORMED, }; struct dc_cm2_gpu_mem_parameters { @@ -1279,6 +1287,7 @@ struct dc_cm2_gpu_mem_parameters { struct dc_cm2_gpu_mem_format_parameters format_params; enum dc_cm2_gpu_mem_pixel_component_order component_order; enum dc_cm2_gpu_mem_size size; + uint16_t bit_depth; }; enum dc_cm2_transfer_func_source { @@ -1302,6 +1311,10 @@ struct dc_cm2_func_luts { const struct dc_3dlut *lut3d_func; struct dc_cm2_gpu_mem_parameters gpu_mem_params; }; + bool rmcm_3dlut_shaper_select; + bool mpc_3dlut_enable; + bool rmcm_3dlut_enable; + bool mpc_mcm_post_blend; } lut3d_data; const struct dc_transfer_func *lut1d_func; }; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index b363f5360818..58c84f555c0f 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -391,6 +391,7 @@ static void dccg35_set_dppclk_rcg(struct dccg *dccg, struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) return; @@ -411,6 +412,8 @@ static void dccg35_set_dppclk_rcg(struct dccg *dccg, BREAK_TO_DEBUGGER(); break; } + //DC_LOG_DEBUG("%s: inst(%d) DPPCLK rcg_disable: %d\n", __func__, inst, enable ? 0 : 1); + } static void dccg35_set_dpstreamclk_rcg( @@ -1035,6 +1038,7 @@ static void dccg35_enable_dpp_clk_new( DPPCLK0_DTO_MODULO, 0xFF); } + static void dccg35_disable_dpp_clk_new( struct dccg *dccg, int inst) @@ -1112,30 +1116,24 @@ static void dcn35_set_dppclk_enable(struct dccg *dccg, { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + switch (dpp_inst) { case 0: REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, enable); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable); break; case 1: REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, enable); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable); break; case 2: REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, enable); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable); break; case 3: REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, enable); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable); break; default: break; } + //DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable); } @@ -1163,14 +1161,18 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst, ASSERT(false); phase = 0xff; } + dccg35_set_dppclk_rcg(dccg, dpp_inst, false); REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, DPPCLK0_DTO_PHASE, phase, DPPCLK0_DTO_MODULO, modulo); dcn35_set_dppclk_enable(dccg, dpp_inst, true); - } else + } else { dcn35_set_dppclk_enable(dccg, dpp_inst, false); + /*we have this in hwss: disable_plane*/ + //dccg35_set_dppclk_rcg(dccg, dpp_inst, true); + } dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; } @@ -1182,6 +1184,7 @@ static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg, if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) return; + switch (dpp_inst) { case 0: REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable); @@ -1198,6 +1201,8 @@ static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg, default: break; } + //DC_LOG_DEBUG("%s: dpp_inst(%d) rcg: %d\n", __func__, dpp_inst, enable); + } static void dccg35_get_pixel_rate_div( @@ -1521,28 +1526,30 @@ static void dccg35_set_physymclk_root_clock_gating( switch (phy_inst) { case 0: REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 1: REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 2: REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 3: REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 4: REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, - PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 1 : 0); + PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); return; } + //DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE:\n", __func__, phy_inst, enable ? 0 : 1); + } static void dccg35_set_physymclk( @@ -1643,6 +1650,8 @@ static void dccg35_dpp_root_clock_control( return; if (clock_on) { + dccg35_set_dppclk_rcg(dccg, dpp_inst, false); + /* turn off the DTO and leave phase/modulo at max */ dcn35_set_dppclk_enable(dccg, dpp_inst, 1); REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, @@ -1654,6 +1663,8 @@ static void dccg35_dpp_root_clock_control( REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, DPPCLK0_DTO_PHASE, 0, DPPCLK0_DTO_MODULO, 1); + /*we have this in hwss: disable_plane*/ + //dccg35_set_dppclk_rcg(dccg, dpp_inst, true); } dccg->dpp_clock_gated[dpp_inst] = !clock_on; @@ -1771,36 +1782,40 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) //Disable DTO switch (inst) { case 0: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); + REG_UPDATE_2(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, 0, DSCCLK0_DTO_MODULO, 0); REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); break; case 1: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); + REG_UPDATE_2(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, 0, DSCCLK1_DTO_MODULO, 0); REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); break; case 2: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); + REG_UPDATE_2(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, 0, DSCCLK2_DTO_MODULO, 0); REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); break; case 3: + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, 0, DSCCLK3_DTO_MODULO, 0); REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); break; default: BREAK_TO_DEBUGGER(); @@ -1813,9 +1828,6 @@ static void dccg35_disable_dscclk(struct dccg *dccg, { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - return; - switch (inst) { case 0: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 077337698e0a..b4f5b4a6331a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -976,11 +976,12 @@ static bool dcn31_program_pix_clk( struct bp_pixel_clock_parameters bp_pc_params = {0}; enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; - // Apply ssed(spread spectrum) dpref clock for edp only. - if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 - && pix_clk_params->signal_type == SIGNAL_TYPE_EDP - && encoding == DP_8b_10b_ENCODING) + // Apply ssed(spread spectrum) dpref clock for edp and dp + if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0 && + dc_is_dp_signal(pix_clk_params->signal_type) && + encoding == DP_8b_10b_ENCODING) dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz; + // For these signal types Driver to program DP_DTO without calling VBIOS Command table if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { if (e) { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h index 0721ae895ae9..94128f7a18b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.h @@ -257,7 +257,7 @@ bool dce110_clk_src_construct( struct dce110_clk_src *clk_src, struct dc_context *ctx, struct dc_bios *bios, - enum clock_source_id, + enum clock_source_id id, const struct dce110_clk_src_regs *regs, const struct dce110_clk_src_shift *cs_shift, const struct dce110_clk_src_mask *cs_mask); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index ccc154b0281c..3b9011ef9b68 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -28,6 +28,8 @@ #include "dc.h" #include "core_types.h" #include "dmub_cmd.h" +#include "dc_dmub_srv.h" +#include "dmub/dmub_srv.h" #define TO_DMUB_ABM(abm)\ container_of(abm, struct dce_abm, base) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index 0d7e7f3b81a1..a641ae04450c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -240,7 +240,8 @@ bool dmub_abm_save_restore( cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask; - cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore); + cmd.abm_save_restore.header.payload_bytes = + sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header); dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index c31e4f26a305..fcd3d86ad517 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -280,7 +280,9 @@ static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dm memset(&cmd, 0, sizeof(cmd)); pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL; - pCmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal); + pCmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) - + sizeof(struct dmub_cmd_header); pCmd->replay_set_power_opt_data.power_opt = power_opt; pCmd->replay_set_power_opt_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); @@ -319,7 +321,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_timing_sync.header.sub_type = DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED; cmd.replay_set_timing_sync.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_timing_sync); + sizeof(struct dmub_rb_cmd_replay_set_timing_sync) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst = cmd_element->sync_data.panel_inst; @@ -331,7 +334,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_frameupdate_timer.header.sub_type = DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER; cmd.replay_set_frameupdate_timer.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer); + sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_frameupdate_timer.data.panel_inst = cmd_element->panel_inst; @@ -345,7 +349,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_pseudo_vtotal.header.sub_type = DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL; cmd.replay_set_pseudo_vtotal.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal); + sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_pseudo_vtotal.data.panel_inst = cmd_element->pseudo_vtotal_data.panel_inst; @@ -357,7 +362,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_disabled_adaptive_sync_sdp.header.sub_type = DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP; cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp); + sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst = cmd_element->disabled_adaptive_sync_sdp_data.panel_inst; @@ -369,7 +375,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_general_cmd.header.sub_type = DMUB_CMD__REPLAY_SET_GENERAL_CMD; cmd.replay_set_general_cmd.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_general_cmd); + sizeof(struct dmub_rb_cmd_replay_set_general_cmd) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_general_cmd.data.panel_inst = cmd_element->set_general_cmd_data.panel_inst; diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile index eede83ad91fa..824f73eb3326 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile @@ -25,8 +25,7 @@ CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init -DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \ - dce60_resource.o +DCE60 = dce60_timing_generator.o AMD_DAL_DCE60 = $(addprefix $(AMDDALPATH)/dc/dce60/,$(DCE60)) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 003a9330c286..88e7a1fc9a30 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -105,7 +105,7 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz) dm_write_reg(tg->ctx, addr, value); } -static void program_timing(struct timing_generator *tg, +static void dce80_timing_generator_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, int vready_offset, int vstartup_start, @@ -185,7 +185,7 @@ static void dce80_timing_generator_enable_advanced_request( static const struct timing_generator_funcs dce80_tg_funcs = { .validate_timing = dce110_tg_validate_timing, - .program_timing = program_timing, + .program_timing = dce80_timing_generator_program_timing, .enable_crtc = dce110_timing_generator_enable_crtc, .disable_crtc = dce110_timing_generator_disable_crtc, .is_counter_moving = dce110_timing_generator_is_counter_moving, diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 5efddd48d5c5..9d160b39e8c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -153,6 +153,14 @@ bool dm_helpers_submit_i2c( const struct dc_link *link, struct i2c_command *cmd); +bool dm_helpers_execute_fused_io( + struct dc_context *ctx, + struct dc_link *link, + union dmub_rb_cmd *commands, + uint8_t count, + uint32_t timeout_us +); + bool dm_helpers_dp_write_dsc_enable( struct dc_context *ctx, const struct dc_stream_state *stream, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index f1fe49401bc0..8d24763938ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -1002,6 +1002,7 @@ static bool CalculatePrefetchSchedule( dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); + dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC); Tsw_oto = Lsw_oto * LineTime; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index f567a9023682..ed59c77bc6f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -1105,6 +1105,7 @@ static bool CalculatePrefetchSchedule( Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); + dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; Tpre_rounded = dst_y_prefetch_equ * LineTime; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 5865e8fa2d8e..9f3938a50240 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -1123,6 +1123,7 @@ static bool CalculatePrefetchSchedule( Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); + dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; Tpre_rounded = dst_y_prefetch_equ * LineTime; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 56dda686e299..b0fc1fd20208 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -627,6 +627,7 @@ static bool dcn32_assign_subvp_pipe(struct dc *dc, */ if (pipe->plane_state && !pipe->top_pipe && !pipe->prev_odm_pipe && !dcn32_is_center_timing(pipe) && !pipe->stream->hw_cursor_req && + !dc_state_get_stream_cursor_subvp_limit(pipe->stream, context) && !(pipe->stream->timing.pix_clk_100hz / 10000 > DCN3_2_MAX_SUBVP_PIXEL_RATE_MHZ) && (!dcn32_is_psr_capable(pipe) || (context->stream_count == 1 && dc->caps.dmub_caps.subvp_psr)) && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_NONE && diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 21fd466dba26..157ecf008d6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -99,7 +99,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math. CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_ccflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) @@ -117,11 +116,9 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_floa CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_rcflags) DML21 := src/dml2_top/dml2_top_interfaces.o DML21 += src/dml2_top/dml2_top_soc15.o -DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 731fbd4bc600..d47cacfdb695 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -526,7 +526,8 @@ static void populate_dml21_output_config_from_stream_state(struct dml2_link_outp static void populate_dml21_stream_overrides_from_stream_state( struct dml2_stream_parameters *stream_desc, - struct dc_stream_state *stream) + struct dc_stream_state *stream, + struct dc_stream_status *stream_status) { switch (stream->debug.force_odm_combine_segments) { case 0: @@ -551,7 +552,9 @@ static void populate_dml21_stream_overrides_from_stream_state( if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || stream->debug.force_odm_combine_segments > 0) stream_desc->overrides.disable_dynamic_odm = true; - stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || stream->hw_cursor_req; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || + stream->hw_cursor_req || + stream_status->mall_stream_config.cursor_size_limit_subvp; } static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) @@ -885,6 +888,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm case DC_CM2_GPU_MEM_SIZE_171717: plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; break; + case DC_CM2_GPU_MEM_SIZE_333333: + plane->tdlut.tdlut_width_mode = dml2_tdlut_width_33_cube; + break; case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined break; @@ -946,7 +952,7 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d return location; } -static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, +unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context) { unsigned int plane_id; @@ -1023,7 +1029,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); - populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index]); + populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]); dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.fclk_pstate = dml2_twait_budgeting_setting_if_needed; dml_dispcfg->stream_descriptors[disp_cfg_stream_location].overrides.hw.twait_budgeting.uclk_pstate = dml2_twait_budgeting_setting_if_needed; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 069b939c672a..73a013be1e48 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -11,6 +11,7 @@ struct dc_state; struct dcn_watermarks; union dcn_watermark_set; struct pipe_ctx; +struct dc_plane_state; struct dml2_context; struct dml2_configuration_options; @@ -25,4 +26,5 @@ void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_se void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); +unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index ed6584535e89..208d3651b6ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -12,6 +12,8 @@ #include "dml21_translation_helper.h" #include "dml2_dc_resource_mgmt.h" +#define INVALID -1 + static bool dml21_allocate_memory(struct dml2_context **dml_ctx) { *dml_ctx = vzalloc(sizeof(struct dml2_context)); @@ -208,10 +210,40 @@ static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_sta } } +static void dml21_prepare_mcache_params(struct dml2_context *dml_ctx, struct dc_state *context, struct dc_mcache_params *mcache_params) +{ + int dc_plane_idx = 0; + int dml_prog_idx, stream_idx, plane_idx; + struct dml2_per_plane_programming *pln_prog = NULL; + + for (stream_idx = 0; stream_idx < context->stream_count; stream_idx++) { + for (plane_idx = 0; plane_idx < context->stream_status[stream_idx].plane_count; plane_idx++) { + dml_prog_idx = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_idx]->stream_id, context->stream_status[stream_idx].plane_states[plane_idx], context); + if (dml_prog_idx == INVALID) { + continue; + } + pln_prog = &dml_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; + mcache_params[dc_plane_idx].valid = pln_prog->mcache_allocation.valid; + mcache_params[dc_plane_idx].num_mcaches_plane0 = pln_prog->mcache_allocation.num_mcaches_plane0; + mcache_params[dc_plane_idx].num_mcaches_plane1 = pln_prog->mcache_allocation.num_mcaches_plane1; + mcache_params[dc_plane_idx].requires_dedicated_mall_mcache = pln_prog->mcache_allocation.requires_dedicated_mall_mcache; + mcache_params[dc_plane_idx].last_slice_sharing.plane0_plane1 = pln_prog->mcache_allocation.last_slice_sharing.plane0_plane1; + memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane0, + pln_prog->mcache_allocation.mcache_x_offsets_plane0, + sizeof(int) * (DML2_MAX_MCACHES + 1)); + memcpy(mcache_params[dc_plane_idx].mcache_x_offsets_plane1, + pln_prog->mcache_allocation.mcache_x_offsets_plane1, + sizeof(int) * (DML2_MAX_MCACHES + 1)); + dc_plane_idx++; + } + } +} + static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx) { bool result = false; struct dml2_build_mode_programming_in_out *mode_programming = &dml_ctx->v21.mode_programming; + struct dc_mcache_params mcache_params[MAX_PLANES] = {0}; memset(&dml_ctx->v21.display_config, 0, sizeof(struct dml2_display_cfg)); memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); @@ -246,6 +278,14 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s dml2_map_dc_pipes(dml_ctx, context, NULL, &dml_ctx->v21.dml_to_dc_pipe_mapping, in_dc->current_state); /* if subvp phantoms are present, expand them into dc context */ dml21_handle_phantom_streams_planes(in_dc, context, dml_ctx); + + if (in_dc->res_pool->funcs->program_mcache_pipe_config) { + //Prepare mcache params for each plane based on mcache output from DML + dml21_prepare_mcache_params(dml_ctx, context, mcache_params); + + //populate mcache regs to each pipe + dml_ctx->config.callbacks.allocate_mcache(context, mcache_params); + } } /* Copy DML CLK, WM and REG outputs to bandwidth context */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h index b2075b8c363b..42e715024bc9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h @@ -8,6 +8,7 @@ #include "os_types.h" #include "dml_top_soc_parameter_types.h" +#include "dml_top_display_cfg_types.h" struct dc; struct dc_state; @@ -65,4 +66,67 @@ struct socbb_ip_params_external { struct dml2_ip_capabilities ip_params; struct dml2_soc_bb soc_bb; }; + +/*mcache parameters decided by dml*/ +struct dc_mcache_params { + bool valid; + /* + * For iMALL, dedicated mall mcaches are required (sharing of last + * slice possible), for legacy phantom or phantom without return + * the only mall mcaches need to be valid. + */ + bool requires_dedicated_mall_mcache; + unsigned int num_mcaches_plane0; + unsigned int num_mcaches_plane1; + /* + * Generally, plane0/1 slices must use a disjoint set of caches + * but in some cases the final segement of the two planes can + * use the same cache. If plane0_plane1 is set, then this is + * allowed. + * + * Similarly, the caches allocated to MALL prefetcher are generally + * disjoint, but if mall_prefetch is set, then the final segment + * between the main and the mall pixel requestor can use the same + * cache. + * + * Note that both bits may be set at the same time. + */ + struct { + bool mall_comb_mcache_p0; + bool mall_comb_mcache_p1; + bool plane0_plane1; + } last_slice_sharing; + /* + * A plane is divided into vertical slices of mcaches, + * which wrap on the surface width. + * + * For example, if the surface width is 7680, and split into + * three slices of equal width, the boundary array would contain + * [2560, 5120, 7680] + * + * The assignments are + * 0 = [0 .. 2559] + * 1 = [2560 .. 5119] + * 2 = [5120 .. 7679] + * 0 = [7680 .. INF] + * The final element implicitly is the same as the first, and + * at first seems invalid since it is never referenced (since) + * it is outside the surface. However, its useful when shifting + * (see below). + * + * For any given valid mcache assignment, a shifted version, wrapped + * on the surface width boundary is also assumed to be valid. + * + * For example, shifting [2560, 5120, 7680] by -50 results in + * [2510, 5170, 7630]. + * + * The assignments are now: + * 0 = [0 .. 2509] + * 1 = [2510 .. 5169] + * 2 = [5170 .. 7629] + * 0 = [7630 .. INF] + */ + int mcache_x_offsets_plane0[DML2_MAX_MCACHES + 1]; + int mcache_x_offsets_plane1[DML2_MAX_MCACHES + 1]; +}; #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h index a64ec4dcf11a..c047d56527c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -43,4 +43,5 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o */ bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 25b607e7b726..84c90050668c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -156,6 +156,8 @@ struct dml2_dchub_watermark_regs { uint32_t urgent; uint32_t sr_enter; uint32_t sr_exit; + uint32_t sr_enter_z8; + uint32_t sr_exit_z8; uint32_t uclk_pstate; uint32_t fclk_pstate; uint32_t temp_read_or_ppt; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index 5e1ab6d97640..255f05de362c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -166,7 +166,7 @@ struct dml2_surface_cfg { enum dml2_swizzle_mode tiling; struct { - unsigned long pitch; + unsigned long pitch; // In elements, two pixels per element in 422 packed format unsigned long width; unsigned long height; } plane0; @@ -385,6 +385,7 @@ struct dml2_plane_parameters { long reserved_vblank_time_ns; unsigned int max_vactive_det_fill_delay_us; // 0 = no reserved time, +ve = explicit max delay unsigned int gpuvm_min_page_size_kbytes; + unsigned int hostvm_min_page_size_kbytes; enum dml2_svp_mode_override legacy_svp_config; //TODO remove in favor of svp_config diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index bb863c8c6b39..6ee37386f672 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -456,10 +456,10 @@ bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) in_out->mode_support_result.global.active.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] * 1000), 1.0); in_out->mode_support_result.global.svp_prefetch.average_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = (unsigned long)math_ceil2((l->mode_support_ex_params.out_evaluation_info->urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] * 1000), 1.0); - dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); - dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); - dml2_printf("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); - dml2_printf("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_sdp_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.active.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.active.urgent_bw_dram_kbps); + DML_LOG_VERBOSE("DML::%s: in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps = %ld\n", __func__, in_out->mode_support_result.global.svp_prefetch.urgent_bw_dram_kbps); for (i = 0; i < l->svp_expanded_display_cfg.num_planes; i++) { in_out->mode_support_result.per_plane[i].dppclk_khz = (unsigned int)(core->clean_me_up.mode_lib.ms.RequiredDPPCLK[i] * 1000); @@ -509,7 +509,7 @@ bool core_dcn4_mode_support(struct dml2_core_mode_support_in_out *in_out) stream_index = l->svp_expanded_display_cfg.plane_descriptors[i].stream_index; in_out->mode_support_result.per_stream[stream_index].dscclk_khz = (unsigned int)core->clean_me_up.mode_lib.ms.required_dscclk_freq_mhz[i] * 1000; - dml2_printf("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); + DML_LOG_VERBOSE("CORE_DCN4::%s: i=%d stream_index=%d, in_out->mode_support_result.per_stream[stream_index].dscclk_khz = %u\n", __func__, i, stream_index, in_out->mode_support_result.per_stream[stream_index].dscclk_khz); if (!((stream_bitmask >> stream_index) & 0x1)) { in_out->mode_support_result.cfg_support_info.stream_support_info[stream_index].odms_used = odm_count; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 4c504cb0e1c5..c4dad7164d31 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -54,104 +54,104 @@ static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { - dml2_printf("DML: ===================================== \n"); - dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) - dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) - dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) - dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); if (!fail_only || support->BPPForMultistreamNotIndicated == 1) - dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) - dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) - dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) - dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) - dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) - dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) - dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) - dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) - dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); if (!fail_only || support->TotalAvailablePipesSupport == 0) - dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); if (!fail_only || support->PrefetchSupported == 0) - dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); if (!fail_only || support->DynamicMetadataSupported == 0) - dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) - dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) - dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); if (!fail_only || support->g6_temp_read_support == 0) - dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); if (!fail_only || support->ModeSupport == 0) - dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - dml2_printf("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); } static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) @@ -179,11 +179,9 @@ static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg } else { out_bpp[k] = 0; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); - dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); - dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); } } @@ -212,9 +210,7 @@ static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const stru num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); -#endif + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); return num_active_pipes; } @@ -251,7 +247,7 @@ static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]); - dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); + DML_LOG_VERBOSE("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom); return is_phantom; } @@ -415,19 +411,17 @@ static void CalculateMaxDETAndMinCompressedBufferSize( *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte)); *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; -#if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); - dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); - dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); - dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); - dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); -#endif + DML_LOG_VERBOSE("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte); if (nomDETInKByteOverrideEnable) { *nomDETInKByte = nomDETInKByteOverrideValue; - dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte); } } @@ -502,7 +496,7 @@ static bool dml_is_420(enum dml2_source_format_class source_format) val = 0; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -535,7 +529,7 @@ static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode else if (sw_mode == dml2_gfx11_sw_256kb_r_x) return 262144; else { - DML2_ASSERT(0); + DML_ASSERT(0); return 256; } } @@ -570,8 +564,8 @@ static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode) sw_mode == dml2_gfx11_sw_256kb_r_x) { version = 11; } else { - dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); } return version; @@ -645,21 +639,19 @@ static void CalculateBytePerPixelAndBlockSizes( *BytePerPixelY = 2; *BytePerPixelC = 4; } else { - dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat); + DML_ASSERT(0); } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); - dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); - dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); - dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); - dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); - dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y); - dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c); - dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); - dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); -#endif + DML_LOG_VERBOSE("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: pitch_y = %u\n", __func__, pitch_y); + DML_LOG_VERBOSE("DML::%s: pitch_c = %u\n", __func__, pitch_c); + DML_LOG_VERBOSE("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l); + DML_LOG_VERBOSE("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c); if (dml_get_gfx_version(SurfaceTiling) == 11) { *surf_linear128_l = 0; @@ -703,12 +695,10 @@ static void CalculateBytePerPixelAndBlockSizes( *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); - dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); - dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); - dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); -#endif + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY); + DML_LOG_VERBOSE("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC); + DML_LOG_VERBOSE("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC); if (dml_get_gfx_version(SurfaceTiling) == 11) { if (SurfaceTiling == dml2_gfx11_sw_linear) { @@ -752,8 +742,8 @@ static void CalculateBytePerPixelAndBlockSizes( } else if (SurfaceTiling == dml2_sw_256kb_2d) { macro_tile_scale = 32; } else { - dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling); + DML_ASSERT(0); } *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY; @@ -766,12 +756,10 @@ static void CalculateBytePerPixelAndBlockSizes( } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); - dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); - dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); - dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); -#endif + DML_LOG_VERBOSE("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY); + DML_LOG_VERBOSE("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC); + DML_LOG_VERBOSE("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC); } static void CalculateSinglePipeDPPCLKAndSCLThroughput( @@ -860,10 +848,8 @@ static void CalculateSwathWidth( unsigned int surface_width_ub_c; unsigned int surface_height_ub_c; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); -#endif + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) { @@ -872,11 +858,9 @@ static void CalculateSwathWidth( SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); - dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); - dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight=%lu\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); MainSurfaceODMMode = ODMMode[k]; @@ -899,13 +883,11 @@ static void CalculateSwathWidth( } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); - dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); - dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); - dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u HActive=%lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active); + DML_LOG_VERBOSE("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]); if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) { SwathWidthC[k] = SwathWidthY[k] / 2; @@ -934,22 +916,20 @@ static void CalculateSwathWidth( surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c); surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); - dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); - dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); - dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); - dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); - dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); - dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); - dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); - dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); - dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); - dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); - dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); - dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); - dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l); + DML_LOG_VERBOSE("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y); + DML_LOG_VERBOSE("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]); req_per_swath_ub_l[k] = 0; req_per_swath_ub_c[k] = 0; @@ -995,15 +975,12 @@ static void CalculateSwathWidth( } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); - dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); - dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); -#endif - + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]); } } @@ -1018,13 +995,11 @@ static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsi if (unb_req_force_en) { unb_req_en = unb_req_force_val && unb_req_ok; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); - dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); - dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); - dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); -#endif - return (unb_req_en); + DML_LOG_VERBOSE("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en); + DML_LOG_VERBOSE("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val); + DML_LOG_VERBOSE("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok); + DML_LOG_VERBOSE("DML::%s: unb_req_en = %u\n", __func__, unb_req_en); + return unb_req_en; } static void CalculateDETBufferSize( @@ -1054,16 +1029,14 @@ static void CalculateDETBufferSize( bool NextPotentialSurfaceToAssignDETPieceFound; bool MinimizeReallocationSuccess = false; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); - dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); - dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); - dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); - dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); - dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); -#endif + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte); // Note: Will use default det size if that fits 2 swaths if (UnboundedRequestEnabled) { @@ -1092,19 +1065,15 @@ static void CalculateDETBufferSize( l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); - dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); - dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); - dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET); + DML_LOG_VERBOSE("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET); + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]); if (l->minDET_pipe == 0) { l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte))); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe); } if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) { @@ -1117,12 +1086,10 @@ static void CalculateDETBufferSize( l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); - dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); - dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte); } if (display_cfg->minimize_det_reallocation) { @@ -1194,14 +1161,12 @@ static void CalculateDETBufferSize( l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: --- Before bandwidth adjustment ---\n", __func__); for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]); } - dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__); -#endif - dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); + DML_LOG_VERBOSE("DML::%s: --- DET allocation with bandwidth ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth); l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth; for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { @@ -1213,10 +1178,8 @@ static void CalculateDETBufferSize( } else { DETPieceAssignedToThisSurfaceAlready[k] = false; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); - dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); -#endif + DML_LOG_VERBOSE("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece); } for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) { @@ -1224,22 +1187,18 @@ static void CalculateDETBufferSize( l->NextSurfaceToAssignDETPiece = 0; for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); -#endif + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece); if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound || ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) { l->NextSurfaceToAssignDETPiece = k; NextPotentialSurfaceToAssignDETPieceFound = true; } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); - dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); -#endif + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); + DML_LOG_VERBOSE("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); } if (NextPotentialSurfaceToAssignDETPieceFound) { @@ -1249,20 +1208,16 @@ static void CalculateDETBufferSize( * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte, math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); - dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); - dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); - dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); - dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); - dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); -#endif + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]); + DML_LOG_VERBOSE("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece); + DML_LOG_VERBOSE("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte); + DML_LOG_VERBOSE("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); -#endif + DML_LOG_VERBOSE("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]); l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte; DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true; @@ -1274,13 +1229,11 @@ static void CalculateDETBufferSize( } *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__); - dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: --- After bandwidth adjustment ---\n", __func__); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte); for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); } -#endif } static double CalculateRequiredDispclk( @@ -1510,15 +1463,13 @@ static unsigned int dscceComputeDelay( //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format) pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: bpc: %u\n", __func__, bpc); - dml2_printf("DML::%s: BPP: %f\n", __func__, BPP); - dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); - dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices); - dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); - dml2_printf("DML::%s: Output: %u\n", __func__, Output); - dml2_printf("DML::%s: pixels: %u\n", __func__, pixels); -#endif + DML_LOG_VERBOSE("DML::%s: bpc: %u\n", __func__, bpc); + DML_LOG_VERBOSE("DML::%s: BPP: %f\n", __func__, BPP); + DML_LOG_VERBOSE("DML::%s: sliceWidth: %u\n", __func__, sliceWidth); + DML_LOG_VERBOSE("DML::%s: numSlices: %u\n", __func__, numSlices); + DML_LOG_VERBOSE("DML::%s: pixelFormat: %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Output: %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: pixels: %u\n", __func__, pixels); return pixels; } @@ -1593,10 +1544,8 @@ static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, e // sft Delay = Delay + 1; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); - dml2_printf("DML::%s: Delay = %u\n", __func__, Delay); -#endif + DML_LOG_VERBOSE("DML::%s: pixelFormat = %u\n", __func__, pixelFormat); + DML_LOG_VERBOSE("DML::%s: Delay = %u\n", __func__, Delay); return Delay; } @@ -1667,10 +1616,8 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ } meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0); -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); - dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); -#endif + DML_LOG_VERBOSE("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch); + DML_LOG_VERBOSE("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes); if (p->GPUVMEnable == true) { double meta_vmpg_bytes = 4.0 * 1024.0; *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64); @@ -1724,25 +1671,23 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); - dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); - dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); - dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); - dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); - dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); - dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); - dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); - dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); - dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); - dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); - dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); - dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); - dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); - dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); - dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); -#endif + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear); + DML_LOG_VERBOSE("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel); + DML_LOG_VERBOSE("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels); + DML_LOG_VERBOSE("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes); + DML_LOG_VERBOSE("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes); + DML_LOG_VERBOSE("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight); + DML_LOG_VERBOSE("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth); + DML_LOG_VERBOSE("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub); + DML_LOG_VERBOSE("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes); + DML_LOG_VERBOSE("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight); + DML_LOG_VERBOSE("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth); + DML_LOG_VERBOSE("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub); if (p->SurfaceTiling == dml2_sw_linear) { *p->PixelPTEReqHeight = 1; @@ -1778,22 +1723,20 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel); if (p->GPUVMEnable == true) { - dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n", __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling)); - DML2_ASSERT(0); + DML_ASSERT(0); } } -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); - dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); - dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); - dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); - dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); - dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch); - dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); - dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); -#endif + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth); + DML_LOG_VERBOSE("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear); + DML_LOG_VERBOSE("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize); + DML_LOG_VERBOSE("DML::%s: Pitch = %u\n", __func__, p->Pitch); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height); *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub; *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth); @@ -1811,7 +1754,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->dpte_row_height_linear = 128; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub); #endif } else if (!dml_is_vertical_rotation(p->RotationAngle)) { @@ -1825,7 +1768,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth); } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub); #endif *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize; @@ -1840,7 +1783,7 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub); #endif } @@ -1852,18 +1795,18 @@ static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_ *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); - dml2_printf("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); - dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); - dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); - dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); - dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); - dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); - dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); - dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); - dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage); + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame); #endif return vm_bytes; @@ -1894,12 +1837,12 @@ static unsigned int CalculatePrefetchSourceLines( double numLines = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); - dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps); - dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); - dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); - dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); - dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VTaps = %u\n", __func__, VTaps); + DML_LOG_VERBOSE("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart); + DML_LOG_VERBOSE("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart); + DML_LOG_VERBOSE("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary); + DML_LOG_VERBOSE("DML::%s: SwathHeight = %u\n", __func__, SwathHeight); #endif if (ProgressiveToInterlaceUnitInOPP) *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1)); @@ -1934,11 +1877,11 @@ static unsigned int CalculatePrefetchSourceLines( numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); - dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); - dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); - dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); - dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); + DML_LOG_VERBOSE("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot); + DML_LOG_VERBOSE("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill); + DML_LOG_VERBOSE("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath); + DML_LOG_VERBOSE("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath); + DML_LOG_VERBOSE("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); #endif return (unsigned int)(numLines); @@ -2007,8 +1950,8 @@ static void CalculateMALLUseForStaticScreen( if (is_using_mall_for_ss[k]) TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); - dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL); #endif } @@ -2022,7 +1965,7 @@ static void CalculateMALLUseForStaticScreen( (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { CanAddAnotherSurfaceToMALL = true; SurfaceToAddToMALL = k; - dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall); } } if (CanAddAnotherSurfaceToMALL) { @@ -2030,8 +1973,8 @@ static void CalculateMALLUseForStaticScreen( TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); - dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); + DML_LOG_VERBOSE("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL); #endif } } @@ -2203,15 +2146,15 @@ static void CalculateDCCConfiguration( segment_order_vert_contiguous_chroma = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); - dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); - dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); - dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); - dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); - dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); - dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); - dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); + DML_LOG_VERBOSE("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma); + DML_LOG_VERBOSE("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma); #endif if (DCCProgrammingAssumesScanDirectionUnknown == true) { if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { @@ -2301,12 +2244,12 @@ static void CalculateDCCConfiguration( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); - dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); - dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); - dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); - dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); - dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma); + DML_LOG_VERBOSE("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma); + DML_LOG_VERBOSE("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma); #endif } @@ -2326,26 +2269,26 @@ static void calculate_mcache_row_bytes( unsigned int mvmpg_per_mcache; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans); - dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); - dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); - dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); - dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); - dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); - dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); - dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); - dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); - dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); - dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); - dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); - dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width); - dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height); - dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); - dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); - dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); + DML_LOG_VERBOSE("DML::%s: num_chans = %u\n", __func__, p->num_chans); + DML_LOG_VERBOSE("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes); + DML_LOG_VERBOSE("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary); + DML_LOG_VERBOSE("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode); + DML_LOG_VERBOSE("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x); + DML_LOG_VERBOSE("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y); + DML_LOG_VERBOSE("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width); + DML_LOG_VERBOSE("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height); + DML_LOG_VERBOSE("DML::%s: blk_width = %u\n", __func__, p->blk_width); + DML_LOG_VERBOSE("DML::%s: blk_height = %u\n", __func__, p->blk_height); + DML_LOG_VERBOSE("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width); + DML_LOG_VERBOSE("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height); + DML_LOG_VERBOSE("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes); #endif - DML2_ASSERT(p->mcache_line_size_bytes != 0); - DML2_ASSERT(p->mcache_size_bytes != 0); + DML_ASSERT(p->mcache_line_size_bytes != 0); + DML_ASSERT(p->mcache_size_bytes != 0); *p->mvmpg_width = 0; *p->mvmpg_height = 0; @@ -2370,8 +2313,8 @@ static void calculate_mcache_row_bytes( *p->mvmpg_width = p->vmpg_width; *p->mvmpg_height = p->vmpg_height; } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) { - dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__); + DML_ASSERT(0); } } @@ -2439,25 +2382,25 @@ static void calculate_mcache_row_bytes( *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); - dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); - dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); - dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); - dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); - dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); - dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); - dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); - dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); - dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: blk_bytes = %u\n", __func__, blk_bytes); + DML_LOG_VERBOSE("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel); + DML_LOG_VERBOSE("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub); + DML_LOG_VERBOSE("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub); + DML_LOG_VERBOSE("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width); + DML_LOG_VERBOSE("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); - dml2_printf("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); - dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes); + DML_LOG_VERBOSE("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel); + DML_LOG_VERBOSE("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches); #endif - DML2_ASSERT(*p->num_mcaches > 0); + DML_ASSERT(*p->num_mcaches > 0); } static void calculate_mcache_setting( @@ -2523,7 +2466,7 @@ static void calculate_mcache_setting( l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l; calculate_mcache_row_bytes(scratch, &l->l_p); - DML2_ASSERT(*p->num_mcaches_l > 0); + DML_ASSERT(*p->num_mcaches_l > 0); if (l->is_dual_plane) { l->c_p.num_chans = p->num_chans; @@ -2559,7 +2502,7 @@ static void calculate_mcache_setting( l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c; calculate_mcache_row_bytes(scratch, &l->c_p); - DML2_ASSERT(*p->num_mcaches_c > 0); + DML_ASSERT(*p->num_mcaches_c > 0); } // Sharing for iMALL access @@ -2598,28 +2541,28 @@ static void calculate_mcache_setting( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); - dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); - dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); - dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); - dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); - dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); - dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); - dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); - dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); - dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); + DML_LOG_VERBOSE("DML::%s: imall_enable = %u\n", __func__, p->imall_enable); + DML_LOG_VERBOSE("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane); + DML_LOG_VERBOSE("DML::%s: surf_vert = %u\n", __func__, p->surf_vert); + DML_LOG_VERBOSE("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l); + DML_LOG_VERBOSE("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l); if (l->is_dual_plane) { - dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); - dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); - dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); - dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); - dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); - dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); - dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); - dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); - dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); - dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); + DML_LOG_VERBOSE("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c); + DML_LOG_VERBOSE("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c); + DML_LOG_VERBOSE("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor); + DML_LOG_VERBOSE("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c); + DML_LOG_VERBOSE("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c); + DML_LOG_VERBOSE("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c); + DML_LOG_VERBOSE("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c); + DML_LOG_VERBOSE("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size); + DML_LOG_VERBOSE("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache); } #endif // calculate split_coordinate @@ -2639,11 +2582,11 @@ static void calculate_mcache_setting( } #ifdef __DML_VBA_DEBUG__ for (n = 0; n < *p->num_mcaches_l; n++) - dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); if (l->is_dual_plane) { for (n = 0; n < *p->num_mcaches_c; n++) - dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); } #endif @@ -2660,10 +2603,10 @@ static void calculate_mcache_setting( #ifdef __DML_VBA_DEBUG__ for (n = 0; n < *p->num_mcaches_l; n++) - dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]); for (n = 0; n < *p->num_mcaches_c; n++) - dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); + DML_LOG_VERBOSE("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]); #endif } @@ -2694,8 +2637,8 @@ static void calculate_mall_bw_overhead_factor( mall_prefetch_dram_overhead_factor[k] = 2.0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); - dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]); #endif } } @@ -2772,22 +2715,20 @@ static double dml_get_return_bandwidth_available( else // dml2_core_internal_bw_dram return_bw_mbps = derate_dram_bandwidth; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); - dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); - dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); - dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); - dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); - dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); - dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); - dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); - dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); - dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); - dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); - dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); - dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); - dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); -#endif + DML_LOG_VERBOSE("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw); + DML_LOG_VERBOSE("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en); + DML_LOG_VERBOSE("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); + DML_LOG_VERBOSE("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); + DML_LOG_VERBOSE("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); + DML_LOG_VERBOSE("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth); + DML_LOG_VERBOSE("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor); + DML_LOG_VERBOSE("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor); + DML_LOG_VERBOSE("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor); + DML_LOG_VERBOSE("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps); return return_bw_mbps; } @@ -2807,9 +2748,9 @@ static noinline_for_stack void calculate_bandwidth_available( { unsigned int n, m; - dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); - dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); - dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps); // Calculate all the bandwidth availabe for (m = 0; m < dml2_core_internal_soc_state_max; m++) { @@ -2828,8 +2769,8 @@ static noinline_for_stack void calculate_bandwidth_available( #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); - dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]); #endif // urg_bandwidth_available_vm_only is indexed by soc_state @@ -2843,9 +2784,9 @@ static noinline_for_stack void calculate_bandwidth_available( urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); - dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); - dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]); #endif } } @@ -2879,13 +2820,13 @@ static void calculate_avg_bandwidth_required( // SysActive and SVP Prefetch AVG bandwidth Check for (k = 0; k < num_active_planes; ++k) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: plane %0d\n", __func__, k); - dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); - dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); - dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); - dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); - dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); - dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: plane %0d\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]); + DML_LOG_VERBOSE("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]); #endif sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k]; @@ -2902,10 +2843,10 @@ static void calculate_avg_bandwidth_required( avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); #endif } } @@ -3080,10 +3021,10 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, &p->MaxNumSwathY[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); - dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); - dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); - dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]); #endif p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels); p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k]; @@ -3091,8 +3032,8 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); - dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]); #endif if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) { p->PTEBufferSizeNotExceeded[k] = true; @@ -3104,18 +3045,18 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]); #ifdef __DML_VBA_DEBUG__ if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) { - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); - dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); - dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]); } #endif } @@ -3146,8 +3087,8 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { p->DCCMetaBufferSizeNotExceeded[k] = true; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); - dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]); #endif p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) || (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)); @@ -3170,9 +3111,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, p->DCCMetaBufferSizeNotExceeded[k] = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); - dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); - dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes); + DML_LOG_VERBOSE("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]); #endif } @@ -3209,20 +3150,20 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, &p->dpte_row_bw[k], &p->meta_row_bw[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); - dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); - dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); - dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); - dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); - dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); - dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); - dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); - dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); - dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); - dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); - dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]); #endif } } @@ -3257,19 +3198,19 @@ static double CalculateUrgentLatency( } #ifdef __DML_VBA_DEBUG__ if (qos_type == dml2_qos_param_type_dcn4x) { - dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); - dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); - dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin); } else { - dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); - dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); - dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); - dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); - dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent); + DML_LOG_VERBOSE("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference); } - dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency); #endif return urgent_latency; } @@ -3296,18 +3237,18 @@ static double CalculateTripToMemory( #ifdef __DML_VBA_DEBUG__ if (qos_type == dml2_qos_param_type_dcn4x) { - dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); - dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); - dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); - dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); - dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock); - dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); - dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin); + DML_LOG_VERBOSE("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin); } else { - dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); } - dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); + DML_LOG_VERBOSE("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us); #endif @@ -3334,14 +3275,14 @@ static double CalculateMetaTripToMemory( #ifdef __DML_VBA_DEBUG__ if (qos_type == dml2_qos_param_type_dcn4x) { - dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); - dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); - dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: qos_type = %d\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); } else { - dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); + DML_LOG_VERBOSE("DML::%s: UrgLatency = %f\n", __func__, UrgLatency); } - dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); + DML_LOG_VERBOSE("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us); #endif @@ -3358,7 +3299,6 @@ static void calculate_cursor_req_attributes( unsigned int *cursor_bytes_per_chunk, unsigned int *cursor_bytes) { - unsigned int cursor_pitch = 0; unsigned int cursor_bytes_per_req = 0; unsigned int cursor_width_bytes = 0; unsigned int cursor_height = 0; @@ -3366,10 +3306,6 @@ static void calculate_cursor_req_attributes( //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply. //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width - if (cursor_bpp == 2) - cursor_pitch = 256; - else - cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1); //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows. @@ -3409,8 +3345,8 @@ static void calculate_cursor_req_attributes( *cursor_lines_per_chunk = 1; } else { if (cursor_width > 0) { - dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); - DML2_ASSERT(0); + DML_LOG_VERBOSE("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp); + DML_ASSERT(0); } } @@ -3421,15 +3357,15 @@ static void calculate_cursor_req_attributes( cursor_height = cursor_width; *cursor_bytes = *cursor_bytes_per_line * cursor_height; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); - dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width); - dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); - dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); - dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); - dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); - dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); - dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); - dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch); + DML_LOG_VERBOSE("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp); + DML_LOG_VERBOSE("DML::%s: cursor_width = %d\n", __func__, cursor_width); + DML_LOG_VERBOSE("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes); + DML_LOG_VERBOSE("DML::%s: cursor_pitch = %d\n", __func__, cursor_bpp == 2 ? 256 : (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1)); #endif } @@ -3460,13 +3396,13 @@ static void calculate_cursor_urgent_burst_factor( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); - dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); - dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); - dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); - dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); - dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); - dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); + DML_LOG_VERBOSE("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer); + DML_LOG_VERBOSE("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime); + DML_LOG_VERBOSE("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); #endif } @@ -3501,15 +3437,15 @@ static void CalculateUrgentBurstFactor( *UrgentBurstFactorChroma = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); - dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC); - dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); - dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); - dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); - dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: VRatioC = %f\n", __func__, VRatioC); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC); + DML_LOG_VERBOSE("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); #endif - DML2_ASSERT(VRatio > 0); + DML_ASSERT(VRatio > 0); LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; @@ -3534,12 +3470,12 @@ static void CalculateUrgentBurstFactor( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); - dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); - dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); - dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); - dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); + DML_LOG_VERBOSE("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma); + DML_LOG_VERBOSE("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma); + DML_LOG_VERBOSE("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding); #endif } @@ -3600,10 +3536,10 @@ static void CalculateDCFCLKDeepSleepTdlut( if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) { double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k]; - dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); - dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); - dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); - dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); + DML_LOG_VERBOSE("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk); // increase the deepsleep dcfclk to match the original dispclk throughput rate if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) { @@ -3613,8 +3549,8 @@ static void CalculateDCFCLKDeepSleepTdlut( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); - dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); #endif } @@ -3625,17 +3561,17 @@ static void CalculateDCFCLKDeepSleepTdlut( *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); - dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); - dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); - dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__); + DML_LOG_VERBOSE("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); + DML_LOG_VERBOSE("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); #endif for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); } - dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); } static noinline_for_stack void CalculateDCFCLKDeepSleep( @@ -3731,12 +3667,12 @@ static unsigned int CalculateMaxVStartup( else max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom); - dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); - dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us); - dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); - dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); - dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); + DML_LOG_VERBOSE("DML::%s: VBlankNom = %lu\n", __func__, timing->vblank_nom); + DML_LOG_VERBOSE("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us); + DML_LOG_VERBOSE("DML::%s: line_time_us = %f\n", __func__, line_time_us); + DML_LOG_VERBOSE("DML::%s: vblank_actual = %u\n", __func__, vblank_actual); + DML_LOG_VERBOSE("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); + DML_LOG_VERBOSE("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); #endif max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START); return max_vstartup_lines; @@ -3761,9 +3697,9 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch const long MAXIMUMCOMPRESSION = 4; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); + DML_LOG_VERBOSE("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]); } #endif CalculateSwathWidth( @@ -3797,15 +3733,15 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]); p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); - dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); - dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); #endif if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) { p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256)); @@ -3848,11 +3784,11 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->CompressedBufferSizeInkByte); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); - dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); - dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); - dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP); + DML_LOG_VERBOSE("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte); + DML_LOG_VERBOSE("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte); #endif *p->ViewportSizeSupport = true; @@ -3860,7 +3796,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); #endif if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) { p->SwathHeightY[k] = MaximumSwathHeightY[k]; @@ -3917,13 +3853,13 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) { *p->ViewportSizeSupport = false; - dml2_printf("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); - dml2_printf("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); - dml2_printf("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]); p->ViewportSizeSupportPerSurface[k] = false; } else { p->ViewportSizeSupportPerSurface[k] = true; @@ -3931,35 +3867,35 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch if (p->SwathHeightC[k] == 0) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k); #endif p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024; p->DETBufferSizeC[k] = 0; } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k); #endif p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2; } else { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k); #endif p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024)); p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k]; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); - dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); - dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]); #endif } @@ -3969,12 +3905,12 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); - dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b); #endif *p->hw_debug5 = false; @@ -3989,12 +3925,12 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) *p->hw_debug5 = true; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); - dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); - dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); + DML_LOG_VERBOSE("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + DML_LOG_VERBOSE("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + DML_LOG_VERBOSE("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif } #endif @@ -4192,15 +4128,15 @@ static noinline_for_stack void CalculateODMMode( SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock); SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse); - dml2_printf("DML::%s: Output = %d\n", __func__, Output); - dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); - dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); - dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); - dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); + DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); + DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: DSCEnable = %d\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk); + DML_LOG_VERBOSE("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne); + DML_LOG_VERBOSE("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne); #endif if (ODMUse == dml2_odm_mode_auto) DecidedODMMode = DecideODMMode(HActive, @@ -4245,10 +4181,10 @@ static noinline_for_stack void CalculateODMMode( *NumberOfDPP = NumberOfDPPRequired; *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode); - dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); - dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); - dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); + DML_LOG_VERBOSE("DML::%s: ODMMode = %d\n", __func__, *ODMMode); + DML_LOG_VERBOSE("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP); + DML_LOG_VERBOSE("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface); #endif } @@ -4292,17 +4228,17 @@ static noinline_for_stack void CalculateOutputLink( *OutputRate = dml2_core_internal_output_rate_unknown; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); - dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); - dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); - dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); - dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); - dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); - dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); - dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); - dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); - dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output); - dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); + DML_LOG_VERBOSE("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable); + DML_LOG_VERBOSE("DML::%s: PHYCLK = %f\n", __func__, PHYCLK); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC); + DML_LOG_VERBOSE("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC); + DML_LOG_VERBOSE("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP); + DML_LOG_VERBOSE("DML::%s: Output (encoder) = %u\n", __func__, Output); + DML_LOG_VERBOSE("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate); #endif { if (Output == dml2_hdmi) { @@ -4487,9 +4423,9 @@ static noinline_for_stack void CalculateOutputLink( } } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); - dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); - dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp); + DML_LOG_VERBOSE("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC); + DML_LOG_VERBOSE("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC); + DML_LOG_VERBOSE("DML::%s: OutBpp = %f\n", __func__, *OutBpp); #endif } @@ -4571,17 +4507,17 @@ static unsigned int DSCDelayRequirement( DSCDelayRequirement_val = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); - dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode); - dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); - dml2_printf("DML::%s: HActive = %u\n", __func__, HActive); - dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); - dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); - dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); - dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); - dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); - dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); - dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); + DML_LOG_VERBOSE("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, ODMMode); + DML_LOG_VERBOSE("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); + DML_LOG_VERBOSE("DML::%s: HActive = %u\n", __func__, HActive); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd); + DML_LOG_VERBOSE("DML::%s: OutputFormat = %u\n", __func__, OutputFormat); + DML_LOG_VERBOSE("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent); + DML_LOG_VERBOSE("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices); + DML_LOG_VERBOSE("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val); #endif return DSCDelayRequirement_val; @@ -4654,10 +4590,10 @@ static void CalculateSurfaceSizeInMall( (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); - dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); - dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); - dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); + DML_LOG_VERBOSE("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP); + DML_LOG_VERBOSE("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS); + DML_LOG_VERBOSE("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize); #endif } @@ -4674,7 +4610,6 @@ static void calculate_tdlut_setting( unsigned int tdlut_vmpg_per_frame; unsigned int tdlut_pte_req_per_frame; unsigned int tdlut_bytes_per_line; - unsigned int tdlut_delivery_cycles; double tdlut_drain_rate; unsigned int tdlut_mpc_width; unsigned int tdlut_bytes_per_group_simple; @@ -4737,13 +4672,13 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width; *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices - tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple; - tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); + //tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1); tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz; } @@ -4756,25 +4691,25 @@ static void calculate_tdlut_setting( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); - dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); - dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); - dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); + DML_LOG_VERBOSE("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame); - dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); - dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); - dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); - dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); - dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); - dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); - dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); - dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); - dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles); - dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); - dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); - dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); - dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); + DML_LOG_VERBOSE("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); + DML_LOG_VERBOSE("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); + DML_LOG_VERBOSE("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); + DML_LOG_VERBOSE("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate); + DML_LOG_VERBOSE("DML::%s: tdlut_delivery_cycles = %u\n", __func__, p->tdlut_addressing_mode == dml2_tdlut_sw_linear ? (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width : (unsigned int)math_ceil2(tdlut_width/2.0, 1)); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time); + DML_LOG_VERBOSE("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver); + DML_LOG_VERBOSE("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub); #endif } @@ -4820,10 +4755,10 @@ static void CalculateTarb( *Tarb = extra_bytes / ReturnBW; *Tarb_prefetch = extra_bytes_prefetch / ReturnBW; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); - dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); - dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); - dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize); + DML_LOG_VERBOSE("DML::%s: extra_bytes = %f\n", __func__, extra_bytes); + DML_LOG_VERBOSE("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch); #endif } @@ -4838,10 +4773,10 @@ static double CalculateTWait( TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); - dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip); - dml2_printf("DML::%s: TWait = %f\n", __func__, TWait); + DML_LOG_VERBOSE("DML::%s: reserved_vblank_time_ns = %ld\n", __func__, reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, Ttrip); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, TWait); #endif return TWait; } @@ -4887,20 +4822,20 @@ static void CalculateVUpdateAndDynamicMetadataParameters( *Tdmsks = *Tdmsks / 2; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); - dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank); - dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal); - dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock); - dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk); - dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); - dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); - dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); + DML_LOG_VERBOSE("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired); + DML_LOG_VERBOSE("DML::%s: VBlank = %u\n", __func__, VBlank); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, HTotal); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, Dppclk); + DML_LOG_VERBOSE("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep); + DML_LOG_VERBOSE("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters); + DML_LOG_VERBOSE("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime); - dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); - dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); - dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); + DML_LOG_VERBOSE("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix); + DML_LOG_VERBOSE("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix); + DML_LOG_VERBOSE("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix); - dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); + DML_LOG_VERBOSE("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); #endif } @@ -4962,11 +4897,11 @@ static double get_urgent_bandwidth_required( l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k]; bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]); - bool exclude_this_plane = 0; + bool exclude_this_plane = false; // Exclude phantom pipe in bw calculation for non svp prefetch state if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom) - exclude_this_plane = 1; + exclude_this_plane = true; // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip. // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe. @@ -4995,12 +4930,12 @@ static double get_urgent_bandwidth_required( surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); - dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); - dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); - dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); - dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); - dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); #endif } else { surface_required_bw[k] = 0.0; @@ -5009,34 +4944,34 @@ static double get_urgent_bandwidth_required( l->required_bandwidth_mbps += surface_required_bw[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); - dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); - dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); - dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); - dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); - dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); - dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); + DML_LOG_VERBOSE("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + DML_LOG_VERBOSE("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur); - dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); - dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); - dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre); + DML_LOG_VERBOSE("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre); - dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); - dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); - dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); - dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); - dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); - dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); - dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); - dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); - dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); - dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); - dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); - dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); - dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); - dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane); + DML_LOG_VERBOSE("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane); #endif } @@ -5120,19 +5055,19 @@ static void CalculateExtraLatency( *ExtraLatency_sr = *ExtraLatency_sr + Tarb; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); - dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); - dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); - dml2_printf("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); - dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); - dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); - dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); - dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); - dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); - dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); - dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); - dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); - dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: qos_type=%u\n", __func__, qos_type); + DML_LOG_VERBOSE("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + DML_LOG_VERBOSE("DML::%s: Tex_trips=%f\n", __func__, Tex_trips); + DML_LOG_VERBOSE("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected); + DML_LOG_VERBOSE("DML::%s: FabricClock=%f\n", __func__, FabricClock); + DML_LOG_VERBOSE("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); + DML_LOG_VERBOSE("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); + DML_LOG_VERBOSE("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + DML_LOG_VERBOSE("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); + DML_LOG_VERBOSE("DML::%s: Tarb=%f\n", __func__, Tarb); + DML_LOG_VERBOSE("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch); #endif } @@ -5199,20 +5134,20 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->HostVMDynamicLevelsTrips = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); - dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); - dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); - dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); - dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); - dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); - dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); - dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); - dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); - dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); + DML_LOG_VERBOSE("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable); + DML_LOG_VERBOSE("DML::%s: mrq_present = %u\n", __func__, p->mrq_present); + DML_LOG_VERBOSE("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); + DML_LOG_VERBOSE("DML::%s: VStartup = %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk); #endif CalculateVUpdateAndDynamicMetadataParameters( p->MaxInterDCNTileRepeaters, @@ -5258,11 +5193,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (p->DynamicMetadataEnable == true) { if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { *p->NotEnoughTimeForDynamicMetadata = true; - dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); - dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + DML_LOG_VERBOSE("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); } else { *p->NotEnoughTimeForDynamicMetadata = false; } @@ -5288,21 +5223,21 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); - dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); - dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); - dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); - dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); - dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); - dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); - dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); - dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); - dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled); + DML_LOG_VERBOSE("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles); + DML_LOG_VERBOSE("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock); + DML_LOG_VERBOSE("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk); + DML_LOG_VERBOSE("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles); + DML_LOG_VERBOSE("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk); + DML_LOG_VERBOSE("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay); + DML_LOG_VERBOSE("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode); + DML_LOG_VERBOSE("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler); - dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); - dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); - dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); - dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); + DML_LOG_VERBOSE("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); + DML_LOG_VERBOSE("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); + DML_LOG_VERBOSE("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); + DML_LOG_VERBOSE("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); #endif if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) @@ -5314,17 +5249,17 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1)); *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal))); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); - dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); #endif #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); - dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); - dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); - dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); - dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); - dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); + DML_LOG_VERBOSE("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips); #endif if (p->display_cfg->gpuvm_enable) { s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; @@ -5402,7 +5337,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } /* oto prefetch bw should be always be less than total vactive bw */ - //DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); + //DML_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; @@ -5421,9 +5356,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->RequiredPrefetchBWOTO = s->prefetch_bw_oto; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); - dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); - dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); + DML_LOG_VERBOSE("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); #endif if (p->display_cfg->gpuvm_enable == true) { @@ -5433,9 +5368,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); - dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); - dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { s->Tvm_oto = s->Tvm_trips_rounded; @@ -5447,9 +5382,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, s->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); - dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); - dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); #endif } else s->Tr0_oto = s->LineTime / 4.0; @@ -5459,11 +5394,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; #ifdef DML_GLOBAL_PREFETCH_CHECK - dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); + DML_LOG_VERBOSE("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); if (p->impacted_dst_y_pre > 0) { - dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); - dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); } #endif *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; @@ -5492,72 +5427,71 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); - dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); - dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); - dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); - dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); - dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); - dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); - dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); - dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); - dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); - dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); - dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); - dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); - dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); - dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); - dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); - dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); - dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); - dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); - dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); - dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); - dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); - dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); - dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); - dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); - dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); + DML_LOG_VERBOSE("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); + DML_LOG_VERBOSE("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem); + DML_LOG_VERBOSE("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); + DML_LOG_VERBOSE("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); + DML_LOG_VERBOSE("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); + DML_LOG_VERBOSE("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); + DML_LOG_VERBOSE("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto); + DML_LOG_VERBOSE("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto); + DML_LOG_VERBOSE("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines); + DML_LOG_VERBOSE("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines); + DML_LOG_VERBOSE("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes); #endif - double Tpre = s->dst_y_prefetch_equ * s->LineTime; s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); - dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); - dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); - dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); - dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); - dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); - dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); - dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); - dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); - dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); - dml2_printf("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); - dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); - dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); - dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); - dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); - dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); - dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); - dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: LineTime: %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: VStartup: %u\n", __func__, p->VStartup); + DML_LOG_VERBOSE("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime); + DML_LOG_VERBOSE("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup); + DML_LOG_VERBOSE("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc); + DML_LOG_VERBOSE("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf); + DML_LOG_VERBOSE("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec); + DML_LOG_VERBOSE("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks); + DML_LOG_VERBOSE("DML::%s: TWait = %f\n", __func__, p->TWait); + DML_LOG_VERBOSE("DML::%s: TWait_p = %f\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip = %f\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch); + DML_LOG_VERBOSE("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm); + DML_LOG_VERBOSE("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl); + DML_LOG_VERBOSE("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p); + DML_LOG_VERBOSE("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); + DML_LOG_VERBOSE("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); + DML_LOG_VERBOSE("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); + DML_LOG_VERBOSE("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, (s->dst_y_prefetch_equ * s->LineTime), *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); #endif *p->dst_y_per_vm_vblank = 0; @@ -5596,19 +5530,19 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else s->prefetch_bw1 = 0; - dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); - dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); - dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); - dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); + DML_LOG_VERBOSE("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); + DML_LOG_VERBOSE("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } @@ -5620,10 +5554,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else s->prefetch_bw2 = 0; - dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); - dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); } // prefetch_bw3: 2*R0 + SW @@ -5634,10 +5568,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else s->prefetch_bw3 = 0; - dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); - dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } // prefetch_bw4: SW @@ -5647,17 +5581,17 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw4 = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); - dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); - dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); - dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); - dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); - dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); - dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); - dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); - dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); - dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); + DML_LOG_VERBOSE("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + DML_LOG_VERBOSE("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, s->dst_y_prefetch_equ * s->LineTime, *p->Tpre_rounded, (*p->Tpre_rounded - (s->dst_y_prefetch_equ * s->LineTime))); + DML_LOG_VERBOSE("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); + DML_LOG_VERBOSE("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); + DML_LOG_VERBOSE("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + DML_LOG_VERBOSE("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); + DML_LOG_VERBOSE("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); + DML_LOG_VERBOSE("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); + DML_LOG_VERBOSE("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3); + DML_LOG_VERBOSE("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4); #endif { bool Case1OK = false; @@ -5676,14 +5610,14 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); - dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); if (s->prefetch_bw1 > 0) { double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; double row_transfer_time = total_row_bytes / s->prefetch_bw1; - dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); - dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case1OK = true; } @@ -5696,8 +5630,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (s->prefetch_bw2 > 0) { double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; double row_transfer_time = total_row_bytes / s->prefetch_bw2; - dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); - dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { Case2OK = true; } @@ -5709,8 +5643,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (s->prefetch_bw3 > 0) { double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; double row_transfer_time = total_row_bytes / s->prefetch_bw3; - dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); - dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + DML_LOG_VERBOSE("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case3OK = true; } @@ -5730,10 +5664,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); - dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); - dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK); - dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); + DML_LOG_VERBOSE("DML::%s: Case1OK: %u\n", __func__, Case1OK); + DML_LOG_VERBOSE("DML::%s: Case2OK: %u\n", __func__, Case2OK); + DML_LOG_VERBOSE("DML::%s: Case3OK: %u\n", __func__, Case3OK); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ); #endif if (s->prefetch_bw_equ > 0) { @@ -5753,12 +5687,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { s->Tvm_equ = 0; s->Tr0_equ = 0; - dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__); + DML_LOG_VERBOSE("DML::%s: prefetch_bw_equ equals 0!\n", __func__); } } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); - dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); + DML_LOG_VERBOSE("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); + DML_LOG_VERBOSE("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); #endif // Use the more stressful prefetch schedule if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { @@ -5769,7 +5703,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__); + DML_LOG_VERBOSE("DML::%s: Using oto scheduling for prefetch\n", __func__); #endif } else { @@ -5785,7 +5719,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); + DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif } @@ -5797,31 +5731,31 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); - dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); - dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); - dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); - dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); - dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); + DML_LOG_VERBOSE("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us); - dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); - dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); - dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); - dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk); + DML_LOG_VERBOSE("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line); + DML_LOG_VERBOSE("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes); + DML_LOG_VERBOSE("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw); #endif - DML2_ASSERT(*p->dst_y_prefetch < 64); + DML_ASSERT(*p->dst_y_prefetch < 64); unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime); if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) { *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData; *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); - dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY); + DML_LOG_VERBOSE("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY); #endif if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) { if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) { @@ -5829,13 +5763,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); *p->VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); - dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); - dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY); #endif } @@ -5843,22 +5777,22 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); - dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC); + DML_LOG_VERBOSE("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC); #endif if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) { if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) { *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); *p->VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); - dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); - dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC); + DML_LOG_VERBOSE("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); + DML_LOG_VERBOSE("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC); #endif } @@ -5866,36 +5800,34 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); - dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); + DML_LOG_VERBOSE("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY); + DML_LOG_VERBOSE("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); #endif } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); *p->VRatioPrefetchY = 0; *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; } - dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); - dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); - dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); - dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); - dml2_printf("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); - dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); - dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); - dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); + DML_LOG_VERBOSE("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); + DML_LOG_VERBOSE("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime); + DML_LOG_VERBOSE("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime); + DML_LOG_VERBOSE("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n"); + DML_LOG_VERBOSE("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup); + DML_LOG_VERBOSE("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); } else { - dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + DML_LOG_VERBOSE("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + DML_LOG_VERBOSE("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); - dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n", - __func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5916,18 +5848,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch prefetch_vm_bw = 0; } else if (*p->dst_y_per_vm_vblank > 0) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); - dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); - dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, s->LineTime); #endif prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); + DML_LOG_VERBOSE("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif } else { prefetch_vm_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); } if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { @@ -5936,14 +5868,14 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); - dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); - dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); #endif } else { prefetch_row_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); } *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); @@ -5963,12 +5895,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->prefetch_vmrow_bw = 0; } - dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); - dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); - dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); - dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); - dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + DML_LOG_VERBOSE("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); + DML_LOG_VERBOSE("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); return s->NoTimeToPrefetch; } @@ -6005,7 +5937,7 @@ static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned } } if (max_idx <= 0) { - DML2_ASSERT(max_idx >= 0); + DML_ASSERT(max_idx >= 0); max_idx = this_plane_idx; } @@ -6037,12 +5969,12 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core // worst case if the rob and cdb is fully hogged s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); - dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); - dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); - dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); - dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); - dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); + DML_LOG_VERBOSE("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); + DML_LOG_VERBOSE("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); + DML_LOG_VERBOSE("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); + DML_LOG_VERBOSE("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); #endif // calculate the return impact from each plane, request is 256B per dcfclk @@ -6063,12 +5995,12 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); - dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); - dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); - dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); - dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); - dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); + DML_LOG_VERBOSE("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); #endif if (s->src_swath_bytes_c[i] > 0) { // dual_plane @@ -6079,10 +6011,10 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); - dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); - dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); - dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); + DML_LOG_VERBOSE("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); + DML_LOG_VERBOSE("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); #endif } @@ -6090,9 +6022,9 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); - dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); - dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); + DML_LOG_VERBOSE("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); + DML_LOG_VERBOSE("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); + DML_LOG_VERBOSE("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); #endif // clamping to worst case delay which is one which occupy the full rob+cdb if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) @@ -6109,7 +6041,7 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); + DML_LOG_VERBOSE("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); #endif } @@ -6120,8 +6052,8 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core *p->recalc_prefetch_schedule = 1; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); - dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); + DML_LOG_VERBOSE("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); #endif } } else { @@ -6131,8 +6063,8 @@ static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); - dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); + DML_LOG_VERBOSE("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); + DML_LOG_VERBOSE("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); #endif return s->prefetch_global_check_passed; @@ -6150,8 +6082,8 @@ static void calculate_peak_bandwidth_required( memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); - dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); + DML_LOG_VERBOSE("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes); #endif for (unsigned int k = 0; k < p->num_active_planes; ++k) { @@ -6347,12 +6279,12 @@ static void calculate_peak_bandwidth_required( p->surface_peak_required_bw[m][n]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); - dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); - dml2_printf("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); - dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]); #endif - DML2_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); + DML_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]); } } } @@ -6414,18 +6346,18 @@ static void check_urgent_bandwidth_support( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); - dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); - dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom); - dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); - dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); - dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); - dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall); + DML_LOG_VERBOSE("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok); for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { - dml2_printf("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]); } @@ -6446,14 +6378,14 @@ static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); - dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); - dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); - dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); - dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); - dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); - dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps); + DML_LOG_VERBOSE("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps); #endif return flip_bw_available_mbps; @@ -6478,28 +6410,28 @@ static void calculate_immediate_flip_bandwidth_support( *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); - dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); - dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); - dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); - dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + DML_LOG_VERBOSE("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n)); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); #endif - DML2_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); + DML_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]); } *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram; *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); - dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); - dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); - dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); - dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); + DML_LOG_VERBOSE("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state)); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp); + DML_LOG_VERBOSE("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram); + DML_LOG_VERBOSE("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip); + DML_LOG_VERBOSE("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok); for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) { for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { - dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", + DML_LOG_VERBOSE("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]); } @@ -6549,27 +6481,27 @@ static void CalculateFlipSchedule( l->dpte_row_bytes = DPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); - dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); - dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); - dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); - dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); - dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); - dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime); - dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); - dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); - dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); - dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); - dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); - dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); - dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); - dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); - dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); - dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); - dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); - dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio); + DML_LOG_VERBOSE("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + DML_LOG_VERBOSE("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); + DML_LOG_VERBOSE("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: iflip_enable = %u\n", __func__, iflip_enable); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: LineTime = %f\n", __func__, LineTime); + DML_LOG_VERBOSE("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %f\n", __func__, vm_bytes); + DML_LOG_VERBOSE("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes); + DML_LOG_VERBOSE("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height); + DML_LOG_VERBOSE("DML::%s: meta_row_height = %d\n", __func__, meta_row_height); + DML_LOG_VERBOSE("DML::%s: VRatio = %f\n", __func__, VRatio); #endif if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) { @@ -6596,9 +6528,9 @@ static void CalculateFlipSchedule( l->min_row_time = l->min_row_height * LineTime / VRatio; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); + DML_LOG_VERBOSE("DML::%s: min_row_time = %f\n", __func__, l->min_row_time); #endif - DML2_ASSERT(l->min_row_time > 0); + DML_ASSERT(l->min_row_time > 0); if (use_lb_flip_bw) { // For mode check, calculation the flip bw requirement with worst case flip time @@ -6619,20 +6551,20 @@ static void CalculateFlipSchedule( l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded), l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); - dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); - dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); - dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); - dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); - dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); + DML_LOG_VERBOSE("DML::%s: total row bytes (%f row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); + DML_LOG_VERBOSE("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded)); if (l->lb_flip_bw > 0) { - dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); - dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); - dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); - dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); - dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); + DML_LOG_VERBOSE("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw); + DML_LOG_VERBOSE("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); + DML_LOG_VERBOSE("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + DML_LOG_VERBOSE("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); } #endif l->lb_flip_bw = math_max3(l->lb_flip_bw, @@ -6640,8 +6572,8 @@ static void CalculateFlipSchedule( (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); - dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip); + DML_LOG_VERBOSE("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime)); #endif } @@ -6653,13 +6585,12 @@ static void CalculateFlipSchedule( } else { if (iflip_enable) { l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) - double portion = (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); - dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); - dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); - dml2_printf("DML::%s: portion of flip bw = %f\n", __func__, portion); + DML_LOG_VERBOSE("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes); + DML_LOG_VERBOSE("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW); + DML_LOG_VERBOSE("DML::%s: portion of flip bw = %f\n", __func__, (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes); #endif if (l->ImmediateFlipBW == 0) { l->Tvm_flip = 0; @@ -6674,11 +6605,11 @@ static void CalculateFlipSchedule( LineTime / 4.0); } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); - dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); + DML_LOG_VERBOSE("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes)); - dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); - dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip); #endif *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0; *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0; @@ -6711,14 +6642,14 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ if (!use_lb_flip_bw) { - dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); - dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); - dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); - dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); - dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); + DML_LOG_VERBOSE("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); + DML_LOG_VERBOSE("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + DML_LOG_VERBOSE("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); } - dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); - dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); + DML_LOG_VERBOSE("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); #endif } @@ -6736,7 +6667,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); #endif p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency; @@ -6755,20 +6686,20 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); - dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); - dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); - dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); - dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); - dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); - dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); - dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); - dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); - dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); - dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); - dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); + DML_LOG_VERBOSE("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); + DML_LOG_VERBOSE("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time); + DML_LOG_VERBOSE("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime); + DML_LOG_VERBOSE("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); #endif s->TotalActiveWriteback = 0; @@ -6801,11 +6732,11 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); - dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); - dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); - dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); - dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); + DML_LOG_VERBOSE("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark); + DML_LOG_VERBOSE("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark); + DML_LOG_VERBOSE("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired); + DML_LOG_VERBOSE("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency); #endif s->TotalPixelBW = 0.0; @@ -6836,11 +6767,11 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); - dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); - dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel); - dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); - dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); + DML_LOG_VERBOSE("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); + DML_LOG_VERBOSE("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); + DML_LOG_VERBOSE("DML::%s: k=%u, LBBitPerPixel = %f\n", __func__, k, LBBitPerPixel); + DML_LOG_VERBOSE("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps); #endif s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz); @@ -6943,16 +6874,16 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); - dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); - dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); - dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); - dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); - dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); - dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); - dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l); #endif p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l; @@ -6967,10 +6898,10 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); - dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); - dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); - dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); + DML_LOG_VERBOSE("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); + DML_LOG_VERBOSE("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); + DML_LOG_VERBOSE("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); #endif } } @@ -6992,10 +6923,10 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); - dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); - dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); - dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); + DML_LOG_VERBOSE("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported); + DML_LOG_VERBOSE("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported); + DML_LOG_VERBOSE("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported); + DML_LOG_VERBOSE("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport); #endif } @@ -7141,7 +7072,7 @@ static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struc unsigned int index = 0; for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); if (i == 0) index = 0; @@ -7153,32 +7084,30 @@ static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struc break; } } -#if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, index); -#endif + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); return index; } static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) { unsigned int i; - bool clk_entry_found = 0; + bool clk_entry_found = false; for (i = 0; i < clk_table->uclk.num_clk_values; i++) { - dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { - clk_entry_found = 1; + clk_entry_found = true; break; } } if (!clk_entry_found) - DML2_ASSERT(clk_entry_found); + DML_ASSERT(clk_entry_found); #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, i); + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); #endif return i; } @@ -7218,10 +7147,10 @@ static void calculate_hostvm_inefficiency_factor( if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs)) *HostVMInefficiencyFactorPrefetch = 4; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); - dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); - dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); - dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm); + DML_LOG_VERBOSE("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor); + DML_LOG_VERBOSE("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch); #endif } } @@ -7335,30 +7264,659 @@ static void calculate_pstate_keepout_dst_lines( } } +static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_internal_display_mode_lib *mode_lib, + const struct dml2_display_cfg *display_cfg) +{ + struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; + struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; + struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; +#ifdef DML_GLOBAL_PREFETCH_CHECK + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; +#endif + struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; + + double min_return_bw_for_latency; + unsigned int k; + + mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; + + calculate_hostvm_inefficiency_factor( + &s->HostVMInefficiencyFactor, + &s->HostVMInefficiencyFactorPrefetch, + + display_cfg->gpuvm_enable, + display_cfg->hostvm_enable, + mode_lib->ip.remote_iommu_outstanding_translations, + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], + mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); + + mode_lib->ms.Total3dlutActive = 0; + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) + mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; + + // Calculate tdlut schedule related terms + calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; + calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; + calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; + calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; + calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; + calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; + calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; + calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); + + // output + calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; + calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; + calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; + calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; + calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; + calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; + + calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); + } + + min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + + CalculateExtraLatency( + display_cfg, + mode_lib->ip.rob_buffer_size_kbytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, + mode_lib->ms.DCFCLK, + mode_lib->ms.FabricClock, + mode_lib->ip.pixel_chunk_size_kbytes, + min_return_bw_for_latency, + mode_lib->ms.num_active_planes, + mode_lib->ms.NoOfDPP, + mode_lib->ms.dpte_group_bytes, + s->tdlut_bytes_per_group, + s->HostVMInefficiencyFactor, + s->HostVMInefficiencyFactorPrefetch, + mode_lib->soc.hostvm_min_page_size_kbytes, + mode_lib->soc.qos_parameters.qos_type, + !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), + mode_lib->soc.max_outstanding_reqs, + mode_lib->ms.support.request_size_bytes_luma, + mode_lib->ms.support.request_size_bytes_chroma, + mode_lib->ip.meta_chunk_size_kbytes, + mode_lib->ip.dchub_arb_to_ret_delay, + mode_lib->ms.TripToMemory, + mode_lib->ip.hostvm_mode, + + // output + &mode_lib->ms.ExtraLatency, + &mode_lib->ms.ExtraLatency_sr, + &mode_lib->ms.ExtraLatencyPrefetch); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + s->impacted_dst_y_pre[k] = 0; + + s->recalc_prefetch_schedule = 0; + s->recalc_prefetch_done = 0; + do { + mode_lib->ms.support.PrefetchSupported = true; + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; + + mode_lib->ms.TWait[k] = CalculateTWait( + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->ms.UrgLatency, + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx) : 0.0); + + myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; + myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; + myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; + myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; + myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; + myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; + myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; + myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; + myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; + myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; + myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; + myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; + myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; + myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; + myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; + myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; + myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; + myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; + myPipe->ODMMode = mode_lib->ms.ODMMode[k]; + myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; + myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; + myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; + myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; + +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); +#endif + CalculatePrefetchSchedule_params->display_cfg = display_cfg; + CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; + CalculatePrefetchSchedule_params->myPipe = myPipe; + CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; + CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; + CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; + CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; + CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; + CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; + CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; + CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; + CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; + CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; + CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; + CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; + CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; + CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; + CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; + CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; + CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; + CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; + CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; + CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; + CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; + CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; + CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; + CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; + CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; + CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; + CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; + CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; + CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; + CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; + CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; + CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; + CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); + CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; + CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; + CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; + CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; + CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; + CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; + + // output + CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; + CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; + CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; + CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; + CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; + CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l + CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; + CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; + CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; + CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; + CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; + CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; + CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; + CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; + CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; + CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; + CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; + CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; + CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; + CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; + CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; + CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; + + mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + + mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); + } // for k num_planes + + CalculateDCFCLKDeepSleepTdlut( + display_cfg, + mode_lib->ms.num_active_planes, + mode_lib->ms.BytePerPixelY, + mode_lib->ms.BytePerPixelC, + mode_lib->ms.SwathWidthY, + mode_lib->ms.SwathWidthC, + mode_lib->ms.NoOfDPP, + mode_lib->ms.PSCL_FACTOR, + mode_lib->ms.PSCL_FACTOR_CHROMA, + mode_lib->ms.RequiredDPPCLK, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, + mode_lib->soc.return_bus_width_bytes, + mode_lib->ms.RequiredDISPCLK, + s->tdlut_bytes_to_deliver, + s->prefetch_swath_time_us, + + /* Output */ + &mode_lib->ms.dcfclk_deepsleep); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.dst_y_prefetch[k] < 2.0 + || mode_lib->ms.LinesForVM[k] >= 32.0 + || mode_lib->ms.LinesForDPTERow[k] >= 16.0 + || mode_lib->ms.NoTimeForPrefetch[k] == true + || s->DSTYAfterScaler[k] > 8) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); + } + } + + mode_lib->ms.support.DynamicMetadataSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { + if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { + mode_lib->ms.support.DynamicMetadataSupported = false; + } + } + + mode_lib->ms.support.VRatioInPrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { + mode_lib->ms.support.VRatioInPrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); + } + } + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; + + // By default, do not recalc prefetch schedule + s->recalc_prefetch_schedule = 0; + + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok + if (mode_lib->ms.support.PrefetchSupported) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + // Calculate Urgent burst factor for prefetch +#ifdef __DML_VBA_DEBUG__ + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); +#endif + CalculateUrgentBurstFactor( + &display_cfg->plane_descriptors[k], + mode_lib->ms.swath_width_luma_ub[k], + mode_lib->ms.swath_width_chroma_ub[k], + mode_lib->ms.SwathHeightY[k], + mode_lib->ms.SwathHeightC[k], + s->line_times[k], + mode_lib->ms.UrgLatency, + mode_lib->ms.VRatioPreY[k], + mode_lib->ms.VRatioPreC[k], + mode_lib->ms.BytePerPixelInDETY[k], + mode_lib->ms.BytePerPixelInDETC[k], + mode_lib->ms.DETBufferSizeY[k], + mode_lib->ms.DETBufferSizeC[k], + /* Output */ + &mode_lib->ms.UrgentBurstFactorLumaPre[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], + &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + + // Calculate urgent bandwidth required, both urg and non urg peak bandwidth + // assume flip bw is 0 at this point + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + mode_lib->ms.final_flip_bw[k] = 0; + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 0; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + // Check urg peak bandwidth against available urg bw + // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) + check_urgent_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth + &s->dummy_single[1], // double* frac_urg_bandwidth_mall + &mode_lib->ms.support.UrgVactiveBandwidthSupport, + &mode_lib->ms.support.PrefetchBandwidthSupported, + + mode_lib->soc.mall_allocated_for_dcn_mbytes, + mode_lib->ms.support.non_urg_bandwidth_required, + mode_lib->ms.support.urg_vactive_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_required, + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; + DML_LOG_VERBOSE("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { + mode_lib->ms.support.PrefetchSupported = false; + DML_LOG_VERBOSE("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); + } + } + +#ifdef DML_GLOBAL_PREFETCH_CHECK + if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; + if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; + + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / + ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); + s->recalc_prefetch_done = 1; + s->recalc_prefetch_schedule = 1; + } +#endif + } // prefetch schedule ok, do urg bw and flip schedule + } while (s->recalc_prefetch_schedule); + + // Flip Schedule + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_qual, // no flip + mode_lib->ms.support.urg_bandwidth_available); + + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + + } + + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); + + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; + } + + s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; + s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; + s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; + s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; + s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; + s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; + s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; + s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; + s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; + s->mSOCParameters.USRRetrainingLatency = 0; + s->mSOCParameters.SMNLatency = 0; + s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), mode_lib->ms.state_idx); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, mode_lib->ms.state_idx); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; + + CalculateWatermarks_params->display_cfg = display_cfg; + CalculateWatermarks_params->USRRetrainingRequired = false; + CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; + CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; + CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; + CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; + CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; + CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; + CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; + CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; + CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; + CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; + CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; + CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; + CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; + CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; + CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; + CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; + CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; + CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; + CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; + CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; + CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; + CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; + CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; + CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; + CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; + CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; + CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; + + // Output + CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark + CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; + CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; + CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] + CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] + CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; + CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; + CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported + CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; + CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; + CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; + CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; + + CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); + + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); + DML_LOG_VERBOSE("DML::%s: Done prefetch calculation\n", __func__); + +} + static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) { struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg; const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table; -#if defined(__DML_VBA_DEBUG__) - double old_ReadBandwidthLuma; - double old_ReadBandwidthChroma; -#endif double outstanding_latency_us = 0; - double min_return_bw_for_latency; struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals; - struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params; struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; - struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; -#ifdef DML_GLOBAL_PREFETCH_CHECK - struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; -#endif - struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; - struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params; unsigned int k, m, n; @@ -7374,9 +7932,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000); mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000; mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000; - mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000; + mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dispclk / 1000; mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000; - mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; + mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_ss_clocks_khz.dppclk / 1000; mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); @@ -7384,25 +7942,25 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: --- START --- \n", __func__); - dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); - dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); - dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); - dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); - dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); - dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); - dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); - dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); - dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); - dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); - dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); - dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK); + DML_LOG_VERBOSE("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); + DML_LOG_VERBOSE("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); + DML_LOG_VERBOSE("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); for (k = 0; k < mode_lib->ms.num_active_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); #endif CalculateMaxDETAndMinCompressedBufferSize( @@ -7504,12 +8062,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); #ifdef __DML_VBA_DEBUG__ - old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0; - dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma); - dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); #endif } @@ -7629,13 +8185,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); - dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); - dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma); + DML_LOG_VERBOSE("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma); } /* Cursor Support Check */ @@ -7672,11 +8228,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) { mode_lib->ms.support.PitchSupport = false; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); - dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); - dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); - dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); - dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); + DML_LOG_VERBOSE("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u PitchC = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch); + DML_LOG_VERBOSE("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport); #endif } @@ -7708,11 +8264,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { mode_lib->ms.support.ViewportExceedsSurface = true; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); - dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); - dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); - dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); - dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportWidth = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceWidthY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportHeight = %ld\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u SurfaceHeightY = %ld\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); + DML_LOG_VERBOSE("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); #endif } if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { @@ -7894,8 +8450,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC; } #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); - dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); #endif // ensure the number dsc slices is integer multiple based on ODM mode @@ -7911,9 +8467,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0); #if defined(__DML_VBA_DEBUG__) if (!mode_lib->ms.support.DSCSlicesODMModeSupported) { - dml2_printf("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); - dml2_printf("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); - dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices); + DML_LOG_VERBOSE("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]); } #endif } else { @@ -7958,7 +8514,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.MPCCombine[k] = false; mode_lib->ms.NoOfDPP[k] = 1; if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { - dml2_printf("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); + DML_LOG_VERBOSE("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__); } } else { if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) { @@ -7968,7 +8524,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } } #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]); #endif } @@ -8138,7 +8694,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout); - if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_clocks_khz.dtbclk / 1000)) { + if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_ss_clocks_khz.dtbclk / 1000)) { mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true; } } else { @@ -8167,7 +8723,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->DSCFormatFactor = 1; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]); #endif if (mode_lib->ms.RequiresDSC[k] == true) { s->PixelClockBackEndFactor = 3.0; @@ -8185,10 +8741,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); - dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); - dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); - dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor); + DML_LOG_VERBOSE("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported); #endif } } @@ -8423,13 +8979,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); - dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); - dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded); #endif /* VActive bytes to fetch for UCLK P-State */ @@ -8502,7 +9058,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - bool cursor_not_enough_urgent_latency_hiding = 0; + bool cursor_not_enough_urgent_latency_hiding = false; if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { calculate_cursor_req_attributes( @@ -8531,9 +9087,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); - dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); #endif CalculateUrgentBurstFactor( @@ -8605,7 +9161,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]); #endif /* Immediate Flip and MALL parameters */ @@ -8654,16 +9210,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); - dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); - dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); - dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); - dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); - dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); - dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); - dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); - dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + DML_LOG_VERBOSE("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod); + DML_LOG_VERBOSE("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz); + DML_LOG_VERBOSE("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); + DML_LOG_VERBOSE("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; @@ -8703,10 +9258,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); - dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); - dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); - dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); + DML_LOG_VERBOSE("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us); #endif } @@ -8722,8 +9277,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); - dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); + DML_LOG_VERBOSE("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us); #endif } } @@ -8869,7 +9424,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) { mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false; - dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k); } } @@ -8878,639 +9433,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) { mode_lib->ms.support.AvgBandwidthSupport = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); + DML_LOG_VERBOSE("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n)); #endif } } } - /* Prefetch Check */ - { - mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - - calculate_hostvm_inefficiency_factor( - &s->HostVMInefficiencyFactor, - &s->HostVMInefficiencyFactorPrefetch, - - display_cfg->gpuvm_enable, - display_cfg->hostvm_enable, - mode_lib->ip.remote_iommu_outstanding_translations, - mode_lib->soc.max_outstanding_reqs, - mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active], - mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]); - - mode_lib->ms.Total3dlutActive = 0; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut) - mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1; - - // Calculate tdlut schedule related terms - calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK; - calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode; - calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode; - calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size; - calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable; - calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes; - calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag; - calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling); - - // output - calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k]; - calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k]; - calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k]; - calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k]; - calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k]; - calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k]; - - calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); - } - - min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) - s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); - - CalculateExtraLatency( - display_cfg, - mode_lib->ip.rob_buffer_size_kbytes, - mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, - s->ReorderingBytes, - mode_lib->ms.DCFCLK, - mode_lib->ms.FabricClock, - mode_lib->ip.pixel_chunk_size_kbytes, - min_return_bw_for_latency, - mode_lib->ms.num_active_planes, - mode_lib->ms.NoOfDPP, - mode_lib->ms.dpte_group_bytes, - s->tdlut_bytes_per_group, - s->HostVMInefficiencyFactor, - s->HostVMInefficiencyFactorPrefetch, - mode_lib->soc.hostvm_min_page_size_kbytes, - mode_lib->soc.qos_parameters.qos_type, - !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable), - mode_lib->soc.max_outstanding_reqs, - mode_lib->ms.support.request_size_bytes_luma, - mode_lib->ms.support.request_size_bytes_chroma, - mode_lib->ip.meta_chunk_size_kbytes, - mode_lib->ip.dchub_arb_to_ret_delay, - mode_lib->ms.TripToMemory, - mode_lib->ip.hostvm_mode, - - // output - &mode_lib->ms.ExtraLatency, - &mode_lib->ms.ExtraLatency_sr, - &mode_lib->ms.ExtraLatencyPrefetch); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - s->impacted_dst_y_pre[k] = 0; - - s->recalc_prefetch_schedule = 0; - s->recalc_prefetch_done = 0; - do { - mode_lib->ms.support.PrefetchSupported = true; - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; - - s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->ms.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane0.width, - display_cfg->plane_descriptors[k].composition.viewport.plane0.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, - mode_lib->ms.NoOfDPP[k], - display_cfg->plane_descriptors[k].composition.viewport.plane1.width, - display_cfg->plane_descriptors[k].composition.viewport.plane1.height, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, - display_cfg->plane_descriptors[k].composition.rotation_angle); - - struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; - - mode_lib->ms.TWait[k] = CalculateTWait( - display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, - mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory, - !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? - get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); - - myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; - myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; - myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; - myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k]; - myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled; - myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps; - myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps; - myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle; - myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored; - myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k]; - myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k]; - myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k]; - myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k]; - myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced; - myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors; - myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active; - myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total; - myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active; - myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable; - myPipe->ODMMode = mode_lib->ms.ODMMode[k]; - myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format; - myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k]; - myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k]; - myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; - -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); - dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]); -#endif - CalculatePrefetchSchedule_params->display_cfg = display_cfg; - CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; - CalculatePrefetchSchedule_params->myPipe = myPipe; - CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k]; - CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter; - CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl; - CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only; - CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor; - CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal; - CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; - CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; - CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; - CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; - CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; - CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required; - CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes; - CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch; - CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc; - CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k]; - CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k]; - CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k]; - CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k]; - CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k]; - CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k]; - CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k]; - CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k]; - CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k]; - CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k]; - CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k]; - CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k]; - CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory; - CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency; - CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut; - CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k]; - CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k]; - CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k]; - CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0); - CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k]; - CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k]; - CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable; - CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; - CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; - CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; - CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; - CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; - - // output - CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; - CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k]; - CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k]; - CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k]; - CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k]; - CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l - CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c - CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; - CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; - CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; - CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; - CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k]; - CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0]; - CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1]; - CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2]; - CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k]; - CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k]; - CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k]; - CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0]; - CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; - CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; - CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; - CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; - CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; - CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; - CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k]; - - mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); - - mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k]; - dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank); - dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank); - } // for k num_planes - - CalculateDCFCLKDeepSleepTdlut( - display_cfg, - mode_lib->ms.num_active_planes, - mode_lib->ms.BytePerPixelY, - mode_lib->ms.BytePerPixelC, - mode_lib->ms.SwathWidthY, - mode_lib->ms.SwathWidthC, - mode_lib->ms.NoOfDPP, - mode_lib->ms.PSCL_FACTOR, - mode_lib->ms.PSCL_FACTOR_CHROMA, - mode_lib->ms.RequiredDPPCLK, - mode_lib->ms.vactive_sw_bw_l, - mode_lib->ms.vactive_sw_bw_c, - mode_lib->soc.return_bus_width_bytes, - mode_lib->ms.RequiredDISPCLK, - s->tdlut_bytes_to_deliver, - s->prefetch_swath_time_us, - - /* Output */ - &mode_lib->ms.dcfclk_deepsleep); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.dst_y_prefetch[k] < 2.0 - || mode_lib->ms.LinesForVM[k] >= 32.0 - || mode_lib->ms.LinesForDPTERow[k] >= 16.0 - || mode_lib->ms.NoTimeForPrefetch[k] == true - || s->DSTYAfterScaler[k] > 8) { - mode_lib->ms.support.PrefetchSupported = false; - dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); - dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); - dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); - dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); - } - } - - mode_lib->ms.support.DynamicMetadataSupported = true; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) { - mode_lib->ms.support.DynamicMetadataSupported = false; - } - } - - mode_lib->ms.support.VRatioInPrefetchSupported = true; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { - mode_lib->ms.support.VRatioInPrefetchSupported = false; - dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); - } - } - - mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; - - // By default, do not recalc prefetch schedule - s->recalc_prefetch_schedule = 0; - - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok - if (mode_lib->ms.support.PrefetchSupported) { - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - // Calculate Urgent burst factor for prefetch -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); - dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]); - dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]); -#endif - CalculateUrgentBurstFactor( - &display_cfg->plane_descriptors[k], - mode_lib->ms.swath_width_luma_ub[k], - mode_lib->ms.swath_width_chroma_ub[k], - mode_lib->ms.SwathHeightY[k], - mode_lib->ms.SwathHeightC[k], - s->line_times[k], - mode_lib->ms.UrgLatency, - mode_lib->ms.VRatioPreY[k], - mode_lib->ms.VRatioPreC[k], - mode_lib->ms.BytePerPixelInDETY[k], - mode_lib->ms.BytePerPixelInDETC[k], - mode_lib->ms.DETBufferSizeY[k], - mode_lib->ms.DETBufferSizeC[k], - /* Output */ - &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChromaPre[k], - &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); - } - - // Calculate urgent bandwidth required, both urg and non urg peak bandwidth - // assume flip bw is 0 at this point - for (k = 0; k < mode_lib->ms.num_active_planes; k++) - mode_lib->ms.final_flip_bw[k] = 0; - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 0; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - // Check urg peak bandwidth against available urg bw - // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active) - check_urgent_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth - &s->dummy_single[1], // double* frac_urg_bandwidth_mall - &mode_lib->ms.support.UrgVactiveBandwidthSupport, - &mode_lib->ms.support.PrefetchBandwidthSupported, - - mode_lib->soc.mall_allocated_for_dcn_mbytes, - mode_lib->ms.support.non_urg_bandwidth_required, - mode_lib->ms.support.urg_vactive_bandwidth_required, - mode_lib->ms.support.urg_bandwidth_required, - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported; - dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported); - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) { - mode_lib->ms.support.PrefetchSupported = false; - dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]); - } - } - -#ifdef DML_GLOBAL_PREFETCH_CHECK - if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { - CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; - CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; - CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; - CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; - CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; - CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; - CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; - CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; - CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; - CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; - CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; - CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; - CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; - CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; - CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; - if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) - CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; - - CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / - ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); - - // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible - CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; - CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; - mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); - s->recalc_prefetch_done = 1; - s->recalc_prefetch_schedule = 1; - } -#endif - } // prefetch schedule ok, do urg bw and flip schedule - } while (s->recalc_prefetch_schedule); - - // Flip Schedule - // Both prefetch schedule and BW okay - if (mode_lib->ms.support.PrefetchSupported == true) { - mode_lib->ms.BandwidthAvailableForImmediateFlip = - get_bandwidth_available_for_immediate_flip( - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_qual, // no flip - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.TotImmediateFlipBytes = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip) { - s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( - s->HostVMInefficiencyFactor, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.meta_row_bytes[k]); - } else { - s->per_pipe_flip_bytes[k] = 0; - } - mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; - - } - - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - CalculateFlipSchedule( - &mode_lib->scratch, - display_cfg->plane_descriptors[k].immediate_flip, - 1, // use_lb_flip_bw - s->HostVMInefficiencyFactor, - s->Tvm_trips_flip[k], - s->Tr0_trips_flip[k], - s->Tvm_trips_flip_rounded[k], - s->Tr0_trips_flip_rounded[k], - display_cfg->gpuvm_enable, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.BandwidthAvailableForImmediateFlip, - mode_lib->ms.TotImmediateFlipBytes, - display_cfg->plane_descriptors[k].pixel_format, - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ms.Tno_bw_flip[k], - mode_lib->ms.dpte_row_height[k], - mode_lib->ms.dpte_row_height_chroma[k], - mode_lib->ms.use_one_row_for_frame_flip[k], - mode_lib->ip.max_flip_time_us, - mode_lib->ip.max_flip_time_lines, - s->per_pipe_flip_bytes[k], - mode_lib->ms.meta_row_bytes[k], - s->meta_row_height_luma[k], - s->meta_row_height_chroma[k], - mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, - - /* Output */ - &mode_lib->ms.dst_y_per_vm_flip[k], - &mode_lib->ms.dst_y_per_row_flip[k], - &mode_lib->ms.final_flip_bw[k], - &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); - } - - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 1; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - calculate_immediate_flip_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth_flip - &mode_lib->ms.support.ImmediateFlipSupport, - - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_flip, - mode_lib->ms.support.non_urg_bandwidth_required_flip, - mode_lib->ms.support.urg_bandwidth_available); - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) - mode_lib->ms.support.ImmediateFlipSupport = false; - } - - } else { // if prefetch not support, assume iflip is not supported too - mode_lib->ms.support.ImmediateFlipSupport = false; - } - - s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; - s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency; - s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr; - s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us; - s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us; - s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us; - s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us; - s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us; - s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us; - s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us; - s->mSOCParameters.USRRetrainingLatency = 0; - s->mSOCParameters.SMNLatency = 0; - s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index); - s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); - s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; - s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; - - CalculateWatermarks_params->display_cfg = display_cfg; - CalculateWatermarks_params->USRRetrainingRequired = false; - CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines; - CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits; - CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes; - CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK; - CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings; - CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change; - CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes; - CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters; - CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes; - CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK; - CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep; - CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY; - CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; - CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; - CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; - CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; - CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; - CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY; - CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC; - CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler; - CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler; - CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled; - CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte; - CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma; - CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma; - - // Output - CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark - CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport; - CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported; - CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[] - CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[] - CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport; - CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported; - CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported - CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport; - CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support; - CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin; - CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; - - CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); - - calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); - } - dml2_printf("DML::%s: Done prefetch calculation\n", __func__); - // End of Prefetch Check + dml_core_ms_prefetch_check(mode_lib, display_cfg); mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; @@ -9546,8 +9475,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dram_change_vactive_det_fill_delay_us); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); - dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); + DML_LOG_VERBOSE("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); + DML_LOG_VERBOSE("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); #endif /*Mode Support, Voltage State and SOC Configuration*/ @@ -9597,17 +9526,17 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out && !mode_lib->ms.support.ExceededMALLSize && mode_lib->ms.support.g6_temp_read_support && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) { - dml2_printf("DML::%s: mode is supported\n", __func__); + DML_LOG_VERBOSE("DML::%s: mode is supported\n", __func__); mode_lib->ms.support.ModeSupport = true; } else { - dml2_printf("DML::%s: mode is NOT supported\n", __func__); + DML_LOG_VERBOSE("DML::%s: mode is NOT supported\n", __func__); mode_lib->ms.support.ModeSupport = false; } } // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0). - dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); - dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); + DML_LOG_VERBOSE("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; @@ -9623,8 +9552,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k]; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); - dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]); #endif } @@ -9632,7 +9561,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (!mode_lib->ms.support.ModeSupport) dml2_print_mode_support_info(&mode_lib->ms.support, true); - dml2_printf("DML::%s: --- DONE --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: --- DONE --- \n", __func__); #endif return mode_lib->ms.support.ModeSupport; @@ -9642,18 +9571,18 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support { unsigned int result; - dml2_printf("DML::%s: ------------- START ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); result = dml_core_mode_support(in_out_params); if (result) *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; - dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); - dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); return result; } @@ -9687,19 +9616,19 @@ static void CalculatePixelDeliveryTimes( double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); - dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); - dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); - dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); - dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); - dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); - dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); - dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); - dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); - dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used); + DML_LOG_VERBOSE("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz); + DML_LOG_VERBOSE("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]); #endif if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) { DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz; @@ -9733,10 +9662,10 @@ static void CalculatePixelDeliveryTimes( } } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); #endif } @@ -9752,12 +9681,12 @@ static void CalculatePixelDeliveryTimes( DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k]; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); - dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); - dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); - dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]); #endif } } @@ -9853,14 +9782,14 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); - dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); - dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); - dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); - dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); - dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); - dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); - dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]); #endif } @@ -9881,7 +9810,7 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE else p->time_per_tdlut_group[k] = 0; - dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]); if (p->display_cfg->gpuvm_enable == true) { if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) { @@ -9897,14 +9826,14 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE if (dpte_groups_per_row_luma_ub <= 2) { dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; } - dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); - dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); - dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); - dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); - dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); - dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); - dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); + DML_LOG_VERBOSE("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub); p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub; @@ -9928,9 +9857,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE if (dpte_groups_per_row_chroma_ub <= 2) { dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; } - dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); - dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); - dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); + DML_LOG_VERBOSE("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub; @@ -9945,17 +9874,17 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE p->time_per_pte_group_flip_chroma[k] = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); - dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]); - dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); - dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); - dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]); #endif } } // CalculateMetaAndPTETimes @@ -9991,18 +9920,18 @@ static void CalculateVMGroupAndRequestTimes( double line_time; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); + DML_LOG_VERBOSE("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); #endif for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) { double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); - dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); - dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); - dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); - dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); - dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable); + DML_LOG_VERBOSE("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]); #endif if (display_cfg->gpuvm_enable) { @@ -10071,13 +10000,13 @@ static void CalculateVMGroupAndRequestTimes( else TimePerVMRequestFlip[k] = 0.0; - dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); - dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); - dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); - dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref); - dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip); - dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref); - dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %d\n", __func__, k, num_group_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %d\n", __func__, k, num_group_per_lower_vm_stage_flip); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %d\n", __func__, k, num_req_per_lower_vm_stage_pref); + DML_LOG_VERBOSE("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %d\n", __func__, k, num_req_per_lower_vm_stage_flip); if (display_cfg->gpuvm_max_page_table_levels > 2) { TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; @@ -10094,10 +10023,10 @@ static void CalculateVMGroupAndRequestTimes( } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); - dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); - dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); - dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); #endif } } @@ -10113,7 +10042,6 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc unsigned int SingleVTotal = 0; bool SameTiming = true; bool FoundCriticalSurface = false; - double LastZ8StutterPeriod = 0; memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); @@ -10127,9 +10055,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); - dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma); #endif l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0; l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma; @@ -10142,9 +10070,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); - dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); - dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1); + DML_LOG_VERBOSE("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma); #endif l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1; l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma; @@ -10160,19 +10088,19 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); - dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); - dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); - dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); - dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); - dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); - dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); - dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); + DML_LOG_VERBOSE("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled); + DML_LOG_VERBOSE("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma); + DML_LOG_VERBOSE("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma); + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction); - dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); - dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); - dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0); + DML_LOG_VERBOSE("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs); + DML_LOG_VERBOSE("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte); + DML_LOG_VERBOSE("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte); #endif if (l->AverageDCCZeroSizeFraction == 1) { l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth; @@ -10189,10 +10117,10 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); - dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); - dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); - dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64)); + DML_LOG_VERBOSE("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate)); #endif } else { l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate, @@ -10200,16 +10128,16 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); - dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate); #endif } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); - dml2_printf("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); - dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); - dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + DML_LOG_VERBOSE("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries); + DML_LOG_VERBOSE("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries); + DML_LOG_VERBOSE("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); #endif *p->StutterPeriod = 0; @@ -10220,15 +10148,15 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]); l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); - dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); - dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); - dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); - dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); - dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); - dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY); + DML_LOG_VERBOSE("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath); + DML_LOG_VERBOSE("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY); #endif if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) { @@ -10248,17 +10176,17 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); - dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); - dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); - dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); - dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); - dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); - dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); - dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); - dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); - dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); - dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface); + DML_LOG_VERBOSE("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface); #endif } } @@ -10276,14 +10204,14 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); - dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); - dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); - dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); - dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); - dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); - dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); - dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); + DML_LOG_VERBOSE("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate); + DML_LOG_VERBOSE("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0); + DML_LOG_VERBOSE("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0); + DML_LOG_VERBOSE("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024); + DML_LOG_VERBOSE("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth); + DML_LOG_VERBOSE("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK); #endif l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer @@ -10292,10 +10220,10 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); - dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); - dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); - dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + DML_LOG_VERBOSE("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); + DML_LOG_VERBOSE("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64)); + DML_LOG_VERBOSE("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); #endif l->TotalActiveWriteback = 0; memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool)); @@ -10324,9 +10252,9 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc if (l->TotalActiveWriteback == 0) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); - dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); - dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime); + DML_LOG_VERBOSE("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); #endif *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100; *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100; @@ -10339,11 +10267,11 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc *p->Z8NumberOfStutterBurstsPerFrame = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); - dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); - dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); - dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif if (*p->StutterEfficiencyNotIncludingVBlank > 0) { @@ -10358,7 +10286,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) { - LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; + //LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod; if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) { *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank; } else { @@ -10370,25 +10298,25 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); - dml2_printf("DML::%s: SameTiming = %u\n", __func__, SameTiming); - dml2_printf("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); - dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); - dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); - dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); - dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); - dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); - dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); - dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); + DML_LOG_VERBOSE("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG); + DML_LOG_VERBOSE("DML::%s: SameTiming = %u\n", __func__, SameTiming); + DML_LOG_VERBOSE("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings); + DML_LOG_VERBOSE("DML::%s: LastZ8StutterPeriod = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod : 0); + DML_LOG_VERBOSE("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark); + DML_LOG_VERBOSE("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime); + DML_LOG_VERBOSE("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod); + DML_LOG_VERBOSE("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency); + DML_LOG_VERBOSE("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank); + DML_LOG_VERBOSE("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); - dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); + DML_LOG_VERBOSE("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); + DML_LOG_VERBOSE("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); + DML_LOG_VERBOSE("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); #endif } @@ -10422,7 +10350,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex double max_uclk_mhz = 0; double min_return_latency_in_DCFCLK_cycles = 0; - dml2_printf("DML::%s: --- START --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: --- START --- \n", __func__); memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch)); memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program)); @@ -10444,13 +10372,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); - DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4); + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 || cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 || cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); if (cfg_support_info->stream_support_info[stream_index].odms_used > 1) - DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); + DML_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1); switch (cfg_support_info->stream_support_info[stream_index].odms_used) { case (4): @@ -10476,51 +10404,51 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; - DML2_ASSERT(mode_lib->mp.Dppclk[k] > 0); + DML_ASSERT(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; - dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); + DML_LOG_VERBOSE("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; - DML2_ASSERT(mode_lib->mp.Dcfclk > 0); - DML2_ASSERT(mode_lib->mp.FabricClock > 0); - DML2_ASSERT(mode_lib->mp.dram_bw_mbps > 0); - DML2_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); - DML2_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); - DML2_ASSERT(mode_lib->mp.Dispclk > 0); - DML2_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); - DML2_ASSERT(s->SOCCLK > 0); + DML_ASSERT(mode_lib->mp.Dcfclk > 0); + DML_ASSERT(mode_lib->mp.FabricClock > 0); + DML_ASSERT(mode_lib->mp.dram_bw_mbps > 0); + DML_ASSERT(mode_lib->mp.uclk_freq_mhz > 0); + DML_ASSERT(mode_lib->mp.GlobalDPPCLK > 0); + DML_ASSERT(mode_lib->mp.Dispclk > 0); + DML_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0); + DML_ASSERT(s->SOCCLK > 0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); - dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); - dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); - dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); - dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); - dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); - dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); + DML_LOG_VERBOSE("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes); + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes); + DML_LOG_VERBOSE("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk); + DML_LOG_VERBOSE("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock); + DML_LOG_VERBOSE("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps); + DML_LOG_VERBOSE("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz); + DML_LOG_VERBOSE("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk); for (k = 0; k < s->num_active_planes; ++k) { - dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); + DML_LOG_VERBOSE("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]); } - dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); - dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); - dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); - dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); - dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); - dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); + DML_LOG_VERBOSE("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK); + DML_LOG_VERBOSE("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep); + DML_LOG_VERBOSE("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK); + DML_LOG_VERBOSE("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index); + DML_LOG_VERBOSE("DML::%s: min_clk_table min_fclk_khz = %ld\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz); + DML_LOG_VERBOSE("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config)); for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) { - dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); - dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]); + DML_LOG_VERBOSE("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]); } for (k = 0; k < s->num_active_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + DML_LOG_VERBOSE("DML::%s: plane_%d: reserved_vblank_time_ns = %lu\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); #endif CalculateMaxDETAndMinCompressedBufferSize( @@ -10617,8 +10545,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); } CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; @@ -11097,7 +11025,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { - bool cursor_not_enough_urgent_latency_hiding = 0; + bool cursor_not_enough_urgent_latency_hiding = false; s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); @@ -11173,8 +11101,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.WritebackDelay[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); + DML_LOG_VERBOSE("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]); #endif } @@ -11183,7 +11111,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); + DML_LOG_VERBOSE("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); #endif if (s->num_active_planes > 1) { @@ -11219,12 +11147,12 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->DestinationLineTimesForPrefetchLessThan2 = false; s->VRatioPrefetchMoreThanMax = false; - dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); + DML_LOG_VERBOSE("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__); for (k = 0; k < s->num_active_planes; ++k) { struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; - dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); mode_lib->mp.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->mp.UrgentLatency, @@ -11261,7 +11189,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k); #endif CalculatePrefetchSchedule_params->display_cfg = display_cfg; CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch; @@ -11356,7 +11284,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.impacted_prefetch_margin_us[k] = 0; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); #endif mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k]; } // for k @@ -11366,9 +11294,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.NoTimeToPrefetch[k] == true || mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] || mode_lib->mp.DSTYAfterScaler[k] > 8) { - dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); - dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); - dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]); mode_lib->mp.PrefetchModeSupported = false; } if (mode_lib->mp.dst_y_prefetch[k] < 2) @@ -11377,24 +11305,24 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { s->VRatioPrefetchMoreThanMax = true; - dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); - dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); } if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { - dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); mode_lib->mp.PrefetchModeSupported = false; } } if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) { - dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); - dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); + DML_LOG_VERBOSE("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + DML_LOG_VERBOSE("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); mode_lib->mp.PrefetchModeSupported = false; } - dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, + DML_LOG_VERBOSE("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__, mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup); // Prefetch schedule OK, now check prefetch bw @@ -11422,24 +11350,24 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); - dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]); - dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); - dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); - dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); - dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); - dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); - dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); - dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); - dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); - dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); - dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]); + DML_LOG_VERBOSE("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]); #endif } @@ -11503,11 +11431,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.urg_bandwidth_available); if (!mode_lib->mp.PrefetchModeSupported) - dml2_printf("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__); for (k = 0; k < s->num_active_planes; ++k) { if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) { - dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]); mode_lib->mp.PrefetchModeSupported = false; } } @@ -11533,12 +11461,12 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k = %u\n", __func__, k); - dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); - dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); - dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); - dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); - dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); + DML_LOG_VERBOSE("DML::%s: k = %u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]); + DML_LOG_VERBOSE("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]); + DML_LOG_VERBOSE("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]); + DML_LOG_VERBOSE("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]); + DML_LOG_VERBOSE("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes); #endif } for (k = 0; k < s->num_active_planes; ++k) { @@ -11631,13 +11559,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.urg_bandwidth_available); if (!mode_lib->mp.ImmediateFlipSupported) - dml2_printf("DML::%s: Bandwidth not sufficient for flip!", __func__); + DML_LOG_VERBOSE("DML::%s: Bandwidth not sufficient for flip!", __func__); for (k = 0; k < s->num_active_planes; ++k) { if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) { mode_lib->mp.ImmediateFlipSupported = false; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k); #endif } } @@ -11650,28 +11578,28 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); + DML_LOG_VERBOSE("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported); for (k = 0; k < s->num_active_planes; ++k) - dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); - dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); - dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); - dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); + DML_LOG_VERBOSE("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); + DML_LOG_VERBOSE("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable); + DML_LOG_VERBOSE("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported); + DML_LOG_VERBOSE("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported); #endif - dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]); } for (k = 0; k < s->num_active_planes; ++k) - dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) { - dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); + DML_LOG_VERBOSE("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__); } else { - dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); + DML_LOG_VERBOSE("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__); // DCC Configuration for (k = 0; k < s->num_active_planes; ++k) { #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); + DML_LOG_VERBOSE("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k); #endif CalculateDCCConfiguration( display_cfg->plane_descriptors[k].surface.dcc.enable, @@ -11780,8 +11708,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); - dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); - dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); + DML_LOG_VERBOSE("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); + DML_LOG_VERBOSE("DML::%s: DEBUG PixelClock = %ld kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); //Display Pipeline Delivery Time in Prefetch, Groups CalculatePixelDeliveryTimes( @@ -11893,15 +11821,15 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); #endif s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); - dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); - dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin); + DML_LOG_VERBOSE("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]); #endif mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin; @@ -11920,9 +11848,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k]; if (s->blank_lines_remaining < 0) { - dml2_printf("ERROR: Vstartup is larger than vblank!?\n"); + DML_LOG_VERBOSE("ERROR: Vstartup is larger than vblank!?\n"); s->blank_lines_remaining = 0; - DML2_ASSERT(0); + DML_ASSERT(0); } mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup; @@ -11936,18 +11864,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); - dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); - dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); - dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); - dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); - dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); - dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); - dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); - dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); - dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); - dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); - dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, HTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VTotal = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total); + DML_LOG_VERBOSE("DML::%s: k=%u, VActive = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active); + DML_LOG_VERBOSE("DML::%s: k=%u, VFrontPorch = %lu\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch); + DML_LOG_VERBOSE("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]); #endif } @@ -11969,9 +11897,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); - dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + DML_LOG_VERBOSE("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); #endif } @@ -12051,28 +11979,28 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz; mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles; mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles); - DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); + DML_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); - dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); - dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); - dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles); - dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles); - dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); - dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); - dml2_printf("DML::%s: --- END --- \n", __func__); + DML_LOG_VERBOSE("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz); + DML_LOG_VERBOSE("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz); + DML_LOG_VERBOSE("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz); + DML_LOG_VERBOSE("DML::%s: min_return_uclk_cycles = %ld\n", __func__, min_return_uclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_fclk_cycles = %ld\n", __func__, min_return_fclk_cycles); + DML_LOG_VERBOSE("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis); + DML_LOG_VERBOSE("DML::%s: --- END --- \n", __func__); #endif return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported); } bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) { - dml2_printf("DML::%s: ------------- START ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: ------------- START ----------\n", __func__); bool result = dml_core_mode_programming(in_out_params); - dml2_printf("DML::%s: result = %0d\n", __func__, result); - dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); + DML_LOG_VERBOSE("DML::%s: result = %0d\n", __func__, result); + DML_LOG_VERBOSE("DML::%s: ------------- DONE ----------\n", __func__); return result; } @@ -12130,16 +12058,16 @@ void dml2_core_calcs_get_dpte_row_height( unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY; unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML: %s: is_plane1 = %u\n", __func__, is_plane1); - dml2_printf("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); - dml2_printf("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); - dml2_printf("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); - dml2_printf("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); - dml2_printf("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); - dml2_printf("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); - dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); - dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); - dml2_printf("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); + DML_LOG_VERBOSE("DML: %s: is_plane1 = %u\n", __func__, is_plane1); + DML_LOG_VERBOSE("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel); + DML_LOG_VERBOSE("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes); + DML_LOG_VERBOSE("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes); + DML_LOG_VERBOSE("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth); + DML_LOG_VERBOSE("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight); + DML_LOG_VERBOSE("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma); + DML_LOG_VERBOSE("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma); + DML_LOG_VERBOSE("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); #endif unsigned int dummy_integer[21]; @@ -12193,16 +12121,16 @@ void dml2_core_calcs_get_dpte_row_height( CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); + DML_LOG_VERBOSE("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height); #endif } static bool is_dual_plane(enum dml2_source_format_class source_format) { - bool ret_val = 0; + bool ret_val = false; if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) - ret_val = 1; + ret_val = true; return ret_val; } @@ -12220,6 +12148,8 @@ static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + wm_regs->sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); @@ -12246,11 +12176,11 @@ void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); - dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); - dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); - dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); - dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); + DML_LOG_VERBOSE("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position); + DML_LOG_VERBOSE("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset); + DML_LOG_VERBOSE("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset); #endif cursor_dlg_regs->chunk_hdl_adjust = 3; @@ -12286,7 +12216,7 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, double stored_swath_c_bytes; bool is_phantom_pipe; - dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx); pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024); min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes); @@ -12329,19 +12259,19 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) { unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); + DML_LOG_VERBOSE("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear); #endif - DML2_ASSERT(p0_pte_row_height_linear >= 8); + DML_ASSERT(p0_pte_row_height_linear >= 8); rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3; if (dual_plane) { unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); + DML_LOG_VERBOSE("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear); #endif if (sw_mode == dml2_sw_linear) { - DML2_ASSERT(p1_pte_row_height_linear >= 8); + DML_ASSERT(p1_pte_row_height_linear >= 8); } rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3; } @@ -12375,12 +12305,12 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) { detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr); #endif } else { detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr); #endif } } @@ -12388,15 +12318,15 @@ static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs, rq_regs->plane1_base_address = detile_buf_plane1_addr; #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); - dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); - dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); - dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); - dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); - dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); + DML_LOG_VERBOSE("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes); + DML_LOG_VERBOSE("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr); + DML_LOG_VERBOSE("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address); #endif - //dml2_printf_rq_regs_st(rq_regs); - dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + //DML_LOG_VERBOSE_rq_regs_st(rq_regs); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); } static void rq_dlg_get_dlg_reg( @@ -12411,10 +12341,10 @@ static void rq_dlg_get_dlg_reg( memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals)); - dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx); l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx); - DML2_ASSERT(l->plane_idx < DML2_MAX_PLANES); + DML_ASSERT(l->plane_idx < DML2_MAX_PLANES); l->source_format = dml2_444_8; l->odm_mode = dml2_odm_mode_bypass; @@ -12444,18 +12374,18 @@ static void rq_dlg_get_dlg_reg( l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000; l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz; - dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); - dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); - dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); - dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); - dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz); - dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); - dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); - dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); + DML_LOG_VERBOSE("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx); + DML_LOG_VERBOSE("DML_DLG: %s: htotal = %d\n", __func__, l->htotal); + DML_LOG_VERBOSE("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: soc.refclk_mhz = %d\n", __func__, mode_lib->soc.dchub_refclk_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced); - DML2_ASSERT(l->refclk_freq_in_mhz != 0); - DML2_ASSERT(l->pclk_freq_in_mhz != 0); - DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0); + DML_ASSERT(l->refclk_freq_in_mhz != 0); + DML_ASSERT(l->pclk_freq_in_mhz != 0); + DML_ASSERT(l->ref_freq_to_pix_freq < 4.0); // Need to figure out which side of odm combine we're in // Assume the pipe instance under the same plane is in order @@ -12484,14 +12414,14 @@ static void rq_dlg_get_dlg_reg( l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.) disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq); - dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); - dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); - dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); - dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane); + DML_LOG_VERBOSE("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine); + DML_LOG_VERBOSE("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor); } - dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end); - DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13)); disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19)); disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8)); @@ -12500,20 +12430,20 @@ static void rq_dlg_get_dlg_reg( l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]]; l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]); - dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); - dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); - dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); + DML_LOG_VERBOSE("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start); + DML_LOG_VERBOSE("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq); l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]); disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0; - dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); + DML_LOG_VERBOSE("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0); l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]); - dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); - dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); + DML_LOG_VERBOSE("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler); l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]]; l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]]; @@ -12521,28 +12451,28 @@ static void rq_dlg_get_dlg_reg( l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]]; l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]]; - dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); - dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); - dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); - dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); - dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank); if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) { - DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); + DML_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank)); } l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]]; l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]]; - dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); - dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c); // Active l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l); l->refcyc_per_line_delivery_pre_c = 0.0; l->refcyc_per_line_delivery_c = 0.0; @@ -12551,8 +12481,8 @@ static void rq_dlg_get_dlg_reg( l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); - dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c); } disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); @@ -12561,8 +12491,8 @@ static void rq_dlg_get_dlg_reg( l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l); l->refcyc_per_req_delivery_pre_c = 0.0; l->refcyc_per_req_delivery_c = 0.0; @@ -12570,16 +12500,16 @@ static void rq_dlg_get_dlg_reg( l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz; - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); - dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c); + DML_LOG_VERBOSE("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c); } // TTU - Cursor - DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); + DML_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1); // Assign to register structures disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2)); - DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); + DML_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18)); disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk @@ -12592,10 +12522,10 @@ static void rq_dlg_get_dlg_reg( disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19)); disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19)); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); - dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); + DML_LOG_VERBOSE("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz); @@ -12662,11 +12592,11 @@ static void rq_dlg_get_dlg_reg( disp_ttu_regs->qos_ramp_disable_c = 0; disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz); - // CHECK for HW registers' range, DML2_ASSERT or clamp - DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); - DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); - DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); - DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); + // CHECK for HW registers' range, DML_ASSERT or clamp + DML_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13)); + DML_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13)); if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23)) disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1); @@ -12680,16 +12610,16 @@ static void rq_dlg_get_dlg_reg( disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1); - DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); - DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8); + DML_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13)); if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) { - dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1); l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1; } if (l->dual_plane) { if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) { - dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); + DML_LOG_VERBOSE("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1); l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1; } } @@ -12700,20 +12630,20 @@ static void rq_dlg_get_dlg_reg( if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23)) disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1); } - DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13)); if (l->dual_plane) { - DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13)); } - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); - DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); - DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); - DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13)); + DML_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14)); + DML_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24)); - dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); + DML_LOG_VERBOSE("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx); } } @@ -12736,11 +12666,11 @@ static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, co arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); - dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); - dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); - dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); - dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); + DML_LOG_VERBOSE("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); + DML_LOG_VERBOSE("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit); + DML_LOG_VERBOSE("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes); + DML_LOG_VERBOSE("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req); + DML_LOG_VERBOSE("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis); #endif } @@ -13013,10 +12943,10 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); - dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); - dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); - dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us); + DML_LOG_VERBOSE("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us); + DML_LOG_VERBOSE("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us); + DML_LOG_VERBOSE("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines); + DML_LOG_VERBOSE("DML::%s: vblank_reserved_time_us = %u\n", __func__, out->vblank_reserved_time_us); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 4e502f0a6d20..bdee6ad7bc59 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -1078,6 +1078,8 @@ struct dml2_core_calcs_mode_programming_locals { enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; unsigned int lb_source_lines_l[DML2_MAX_PLANES]; unsigned int lb_source_lines_c[DML2_MAX_PLANES]; + unsigned int num_dsc_slices[DML2_MAX_PLANES]; + bool dsc_enable[DML2_MAX_PLANES]; }; struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c index 2504d9c2ec34..7a220c0141c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c @@ -82,7 +82,7 @@ bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) val = 0; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -145,7 +145,7 @@ bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format) val = 0; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -208,7 +208,7 @@ bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) val = 1; break; default: - DML2_ASSERT(0); + DML_ASSERT(0); break; } return val; @@ -216,104 +216,104 @@ bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { - dml2_printf("DML: ===================================== \n"); - dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + DML_LOG_VERBOSE("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: DML_MODE_SUPPORT_INFO_ST\n"); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) - dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + DML_LOG_VERBOSE("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + DML_LOG_VERBOSE("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) - dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + DML_LOG_VERBOSE("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) - dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); if (!fail_only || support->BPPForMultistreamNotIndicated == 1) - dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + DML_LOG_VERBOSE("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) - dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + DML_LOG_VERBOSE("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + DML_LOG_VERBOSE("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) - dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + DML_LOG_VERBOSE("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) - dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + DML_LOG_VERBOSE("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + DML_LOG_VERBOSE("DML: support: P2IWith420 = %d\n", support->P2IWith420); if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + DML_LOG_VERBOSE("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + DML_LOG_VERBOSE("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) - dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) - dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + DML_LOG_VERBOSE("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + DML_LOG_VERBOSE("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + DML_LOG_VERBOSE("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) - dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + DML_LOG_VERBOSE("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + DML_LOG_VERBOSE("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) - dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + DML_LOG_VERBOSE("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) - dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) - dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + DML_LOG_VERBOSE("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + DML_LOG_VERBOSE("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); if (!fail_only || support->TotalAvailablePipesSupport == 0) - dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + DML_LOG_VERBOSE("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + DML_LOG_VERBOSE("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + DML_LOG_VERBOSE("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + DML_LOG_VERBOSE("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + DML_LOG_VERBOSE("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + DML_LOG_VERBOSE("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + DML_LOG_VERBOSE("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + DML_LOG_VERBOSE("DML: support: CursorSupport = %d\n", support->CursorSupport); if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + DML_LOG_VERBOSE("DML: support: PitchSupport = %d\n", support->PitchSupport); if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + DML_LOG_VERBOSE("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); if (!fail_only || support->PrefetchSupported == 0) - dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + DML_LOG_VERBOSE("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + DML_LOG_VERBOSE("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + DML_LOG_VERBOSE("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); if (!fail_only || support->DynamicMetadataSupported == 0) - dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + DML_LOG_VERBOSE("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) - dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + DML_LOG_VERBOSE("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) - dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + DML_LOG_VERBOSE("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + DML_LOG_VERBOSE("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); if (!fail_only || support->g6_temp_read_support == 0) - dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + DML_LOG_VERBOSE("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + DML_LOG_VERBOSE("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + DML_LOG_VERBOSE("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); if (!fail_only || support->ModeSupport == 0) - dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - dml2_printf("DML: ===================================== \n"); + DML_LOG_VERBOSE("DML: support: ModeSupport = %d\n", support->ModeSupport); + DML_LOG_VERBOSE("DML: ===================================== \n"); } const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) @@ -358,9 +358,9 @@ void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_di out_bpp[k] = 0; } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); - dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); - dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); + DML_LOG_VERBOSE("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + DML_LOG_VERBOSE("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + DML_LOG_VERBOSE("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); #endif } } @@ -391,7 +391,7 @@ unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const } #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); + DML_LOG_VERBOSE("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); #endif return num_active_pipes; } @@ -452,7 +452,7 @@ unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw else if (sw_mode == dml2_gfx11_sw_256kb_r_x) return 262144; else { - DML2_ASSERT(0); + DML_ASSERT(0); return 256; }; } @@ -498,8 +498,8 @@ int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) sw_mode == dml2_gfx11_sw_256kb_r_x) version = 11; else { - dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); - DML2_ASSERT(0); + DML_LOG_VERBOSE("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML_ASSERT(0); } return version; @@ -511,7 +511,7 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co unsigned int index = 0; for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + DML_LOG_VERBOSE("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %ld\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); if (i == 0) index = 0; @@ -524,8 +524,8 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co } } #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, index); + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, index); #endif return index; } @@ -533,32 +533,32 @@ unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, co unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) { unsigned int i; - bool clk_entry_found = 0; + bool clk_entry_found = false; for (i = 0; i < clk_table->uclk.num_clk_values; i++) { - dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + DML_LOG_VERBOSE("DML::%s: clk_table.uclk.clk_values_khz[%d] = %ld\n", __func__, i, clk_table->uclk.clk_values_khz[i]); if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { - clk_entry_found = 1; + clk_entry_found = true; break; } } if (!clk_entry_found) - DML2_ASSERT(clk_entry_found); + DML_ASSERT(clk_entry_found); #if defined(__DML_VBA_DEBUG__) - dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); - dml2_printf("DML::%s: index = %d\n", __func__, i); + DML_LOG_VERBOSE("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + DML_LOG_VERBOSE("DML::%s: index = %d\n", __func__, i); #endif return i; } bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) { - bool ret_val = 0; + bool ret_val = false; if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha)) - ret_val = 1; + ret_val = true; return ret_val; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 15507926f3a4..f486b090bbfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -754,6 +754,8 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); @@ -768,6 +770,8 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit_z8 = (int unsigned)(mode_lib->mp.Watermark.Z8StutterExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c index f4b1a7d02d42..a265f254152c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -182,6 +182,10 @@ static bool build_min_clock_table(const struct dml2_soc_bb *soc_bb, struct dml2_ min_table->max_clocks_khz.dtbclk = soc_bb->clk_table.dtbclk.clk_values_khz[soc_bb->clk_table.dtbclk.num_clk_values - 1]; min_table->max_clocks_khz.phyclk = soc_bb->clk_table.phyclk.clk_values_khz[soc_bb->clk_table.phyclk.num_clk_values - 1]; + min_table->max_ss_clocks_khz.dispclk = (unsigned int)((double)min_table->max_clocks_khz.dispclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dppclk = (unsigned int)((double)min_table->max_clocks_khz.dppclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_ss_clocks_khz.dtbclk = (unsigned int)((double)min_table->max_clocks_khz.dtbclk / (1.0 + soc_bb->dcn_downspread_percent / 100.0)); + min_table->max_clocks_khz.dcfclk = soc_bb->clk_table.dcfclk.clk_values_khz[soc_bb->clk_table.dcfclk.num_clk_values - 1]; min_table->max_clocks_khz.fclk = soc_bb->clk_table.fclk.clk_values_khz[soc_bb->clk_table.fclk.num_clk_values - 1]; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index f50662b83296..d88b3e0082dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -659,7 +659,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) for (i = 1; i <= PMO_DCN4_MAX_DISPLAYS; i++) { switch (i) { case 1: - DML2_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( @@ -670,7 +670,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 2: - DML2_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( @@ -681,7 +681,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 3: - DML2_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( @@ -692,7 +692,7 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 4: - DML2_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); + DML_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ pmo_dcn4_fams2_expand_base_pstate_strategies( diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c index dc2ce5e77f57..4a7c4c62111e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c @@ -761,7 +761,7 @@ bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache total_mcaches_required--; } } - dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); + DML_LOG_VERBOSE("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { result = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c deleted file mode 100644 index c506667897c4..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_debug.h" - -int dml2_log_internal(const char *format, ...) -{ - return 0; -} - -int dml2_printf(const char *format, ...) -{ -#ifdef _DEBUG -#ifdef _DEBUG_PRINTS - int result; - va_list args; - va_start(args, format); - - result = vprintf(format, args); - - va_end(args); - - return result; -#else - return 0; -#endif -#else - return 0; -#endif -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index bfe6f236d2e4..b226225103c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -5,55 +5,62 @@ #ifndef __DML2_DEBUG_H__ #define __DML2_DEBUG_H__ -#ifndef DML2_ASSERT -#define DML2_ASSERT(condition) ((void)0) -#endif +#include "os_types.h" +#define DML_ASSERT(condition) ASSERT(condition) +#define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN +#define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__) -/* - * DML_LOG_FATAL - fatal errors for unrecoverable DML states until a restart. - * DML_LOG_ERROR - unexpected but recoverable failures inside DML - * DML_LOG_WARN - unexpected inputs or events to DML - * DML_LOG_INFO - high level tracing of DML interfaces - * DML_LOG_DEBUG - detailed tracing of DML internal components - * DML_LOG_VERBOSE - detailed tracing of DML calculation procedure - */ -#if !defined(DML_LOG_LEVEL) -#if defined(_DEBUG) && defined(_DEBUG_PRINTS) -/* for backward compatibility with old macros */ -#define DML_LOG_LEVEL 5 -#else -#define DML_LOG_LEVEL 0 -#endif -#endif +/* ASSERT with message output */ +#define DML_ASSERT_MSG(condition, fmt, ...) \ + do { \ + if (!(condition)) { \ + DML_LOG_ERROR("DML ASSERT hit in %s line %d\n", __func__, __LINE__); \ + DML_LOG_ERROR(fmt, ## __VA_ARGS__); \ + DML_ASSERT(condition); \ + } \ + } while (0) -#define DML_LOG_FATAL(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) -#if DML_LOG_LEVEL >= 1 -#define DML_LOG_ERROR(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +/* fatal errors for unrecoverable DML states until a full reset */ +#define DML_LOG_LEVEL_FATAL 0 +/* unexpected but recoverable failures inside DML */ +#define DML_LOG_LEVEL_ERROR 1 +/* unexpected inputs or events to DML */ +#define DML_LOG_LEVEL_WARN 2 +/* high level tracing of DML interfaces */ +#define DML_LOG_LEVEL_INFO 3 +/* detailed tracing of DML internal components */ +#define DML_LOG_LEVEL_DEBUG 4 +/* detailed tracing of DML calculation procedure */ +#define DML_LOG_LEVEL_VERBOSE 5 + +#ifndef DML_LOG_LEVEL +#define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT +#endif /* #ifndef DML_LOG_LEVEL */ + +#define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR +#define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__) #else #define DML_LOG_ERROR(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 2 -#define DML_LOG_WARN(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN +#define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__) #else #define DML_LOG_WARN(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 3 -#define DML_LOG_INFO(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO +#define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__) #else #define DML_LOG_INFO(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 4 -#define DML_LOG_DEBUG(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG +#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL("[DML DEBUG] "fmt, ## __VA_ARGS__) #else #define DML_LOG_DEBUG(fmt, ...) ((void)0) #endif -#if DML_LOG_LEVEL >= 5 -#define DML_LOG_VERBOSE(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE +#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL("[DML VERBOSE] "fmt, ## __VA_ARGS__) #else #define DML_LOG_VERBOSE(fmt, ...) ((void)0) #endif - -int dml2_log_internal(const char *format, ...); -int dml2_printf(const char *format, ...); - -#endif +#endif /* __DML2_DEBUG_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index d8d01dceacdd..00688b9f1df4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -37,6 +37,12 @@ struct dml2_mcg_min_clock_table { unsigned int dcfclk; } max_clocks_khz; + struct { + unsigned int dispclk; + unsigned int dppclk; + unsigned int dtbclk; + } max_ss_clocks_khz; + struct { unsigned int dprefclk; unsigned int xtalclk; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index a966abd40788..5f1b49a50049 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -1082,22 +1082,22 @@ bool dml2_map_dc_pipes(struct dml2_context *ctx, struct dc_state *state, const s if (stream_disp_cfg_index >= disp_cfg_index_max) continue; - if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) { - scratch.odm_info.odm_factor = 1; - } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) { - scratch.odm_info.odm_factor = 2; - } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) { - scratch.odm_info.odm_factor = 4; - } else { - ASSERT(false); - scratch.odm_info.odm_factor = 1; - } - + if (ctx->architecture == dml2_architecture_20) { + if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_bypass) { + scratch.odm_info.odm_factor = 1; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_2to1) { + scratch.odm_info.odm_factor = 2; + } else if (ODMMode[stream_disp_cfg_index] == dml_odm_mode_combine_4to1) { + scratch.odm_info.odm_factor = 4; + } else { + ASSERT(false); + scratch.odm_info.odm_factor = 1; + } + } else if (ctx->architecture == dml2_architecture_21) { /* After DML2.1 update, ODM interpretation needs to change and is no longer same as for DML2.0. * This is not an issue with new resource management logic. This block ensure backcompat * with legacy pipe management with updated DML. * */ - if (ctx->architecture == dml2_architecture_21) { if (ODMMode[stream_disp_cfg_index] == 1) { scratch.odm_info.odm_factor = 1; } else if (ODMMode[stream_disp_cfg_index] == 2) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index ab6baf269801..5de775fd8fce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -896,7 +896,7 @@ static void populate_dummy_dml_surface_cfg(struct dml_surface_cfg_st *out, unsig out->SurfaceWidthC[location] = in->timing.h_addressable; out->SurfaceHeightC[location] = in->timing.v_addressable; out->PitchY[location] = ((out->SurfaceWidthY[location] + 127) / 128) * 128; - out->PitchC[location] = 0; + out->PitchC[location] = 1; out->DCCEnable[location] = false; out->DCCMetaPitchY[location] = 0; out->DCCMetaPitchC[location] = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index e89571874185..525b7d04bf84 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -663,7 +663,10 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s dml2_copy_clocks_to_dc_state(&out_clks, context); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.a, &dml2->v20.dml_core_ctx); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.b, &dml2->v20.dml_core_ctx); - memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); + if (context->streams[0]->sink->link->dc->caps.is_apu) + dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.dml_core_ctx); + else + memcpy(&context->bw_ctx.bw.dcn.watermarks.c, &dml2->v20.g6_temp_read_watermark_set, sizeof(context->bw_ctx.bw.dcn.watermarks.c)); dml2_extract_watermark_set(&context->bw_ctx.bw.dcn.watermarks.d, &dml2->v20.dml_core_ctx); dml2_extract_writeback_wm(context, &dml2->v20.dml_core_ctx); //copy for deciding zstate use diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 785226945699..5100f269368e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -40,6 +40,7 @@ struct dc_sink; struct dc_stream_state; struct resource_context; struct display_stream_compressor; +struct dc_mcache_params; // Configuration of the MALL on the SoC struct dml2_soc_mall_info { @@ -107,6 +108,7 @@ struct dml2_dc_callbacks { unsigned int (*get_max_flickerless_instant_vtotal_increase)( struct dc_stream_state *stream, bool is_gaming); + bool (*allocate_mcache)(struct dc_state *context, const struct dc_mcache_params *mcache_params); }; struct dml2_dc_svp_callbacks { diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index abf439e743f2..2d70586cef40 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -790,8 +790,7 @@ static bool dpp3_program_blnd_lut(struct dpp *dpp_base, if (params == NULL) { REG_SET(CM_BLNDGAM_CONTROL, 0, CM_BLNDGAM_MODE, 0); - if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm) - dpp3_power_on_blnd_lut(dpp_base, false); + dpp3_power_on_blnd_lut(dpp_base, false); return false; } @@ -1204,8 +1203,7 @@ static bool dpp3_program_shaper(struct dpp *dpp_base, if (params == NULL) { REG_SET(CM_SHAPER_CONTROL, 0, CM_SHAPER_LUT_MODE, 0); - if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm) - dpp3_power_on_shaper(dpp_base, false); + dpp3_power_on_shaper(dpp_base, false); return false; } @@ -1399,8 +1397,7 @@ static bool dpp3_program_3dlut(struct dpp *dpp_base, if (params == NULL) { dpp3_set_3dlut_mode(dpp_base, LUT_BYPASS, false, false); - if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.cm) - dpp3_power_on_hdr3dlut(dpp_base, false); + dpp3_power_on_hdr3dlut(dpp_base, false); return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c index 62b7012cda43..f7a373a3d70a 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c @@ -138,7 +138,7 @@ bool dpp35_construct( dpp->base.funcs = &dcn35_dpp_funcs; // w/a for cursor memory stuck in LS by programming DISPCLK_R_GATE_DISABLE, limit w/a to some ASIC revs - if (dpp->base.ctx->asic_id.hw_internal_rev <= 0x10) + if (dpp->base.ctx->asic_id.hw_internal_rev < 0x40) dpp->dispclk_r_gate_disable = true; return ret; } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index 75128fd34306..bd1b9aef6d5c 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -57,13 +57,6 @@ static const struct dsc_funcs dcn20_dsc_funcs = { #define DC_LOGGER \ dsc->ctx->logger -enum dsc_bits_per_comp { - DSC_BPC_8 = 8, - DSC_BPC_10 = 10, - DSC_BPC_12 = 12, - DSC_BPC_UNKNOWN -}; - /* API functions (external or via structure->function_pointer) */ void dsc2_construct(struct dcn20_dsc *dsc, diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h index 1fb90b52b814..a9c04fc95bd1 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h @@ -457,6 +457,12 @@ type DSCRM_DSC_DOUBLE_BUFFER_REG_UPDATE_PENDING; \ type DSCRM_DSC_FORWARD_EN_STATUS +enum dsc_bits_per_comp { + DSC_BPC_8 = 8, + DSC_BPC_10 = 10, + DSC_BPC_12 = 12, + DSC_BPC_UNKNOWN +}; struct dcn20_dsc_registers { uint32_t DSC_TOP_CONTROL; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 4893b793fec0..4222679fd4c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -45,12 +45,6 @@ static const struct dsc_funcs dcn401_dsc_funcs = { #define DC_LOGGER \ dsc->ctx->logger -enum dsc_bits_per_comp { - DSC_BPC_8 = 8, - DSC_BPC_10 = 10, - DSC_BPC_12 = 12, - DSC_BPC_UNKNOWN -}; /* API functions (external or via structure->function_pointer) */ diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index b099989d9364..942d9f0b6df2 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -411,6 +411,20 @@ enum dc_irq_source dal_irq_get_rx_source( } } +enum dc_irq_source dal_irq_get_read_request( + const struct gpio *irq) +{ + enum gpio_id id = dal_gpio_get_id(irq); + + switch (id) { + case GPIO_ID_HPD: + return (enum dc_irq_source)(DC_IRQ_SOURCE_DCI2C_RR_DDC1 + + dal_gpio_get_enum(irq)); + default: + return DC_IRQ_SOURCE_INVALID; + } +} + enum gpio_result dal_irq_setup_hpd_filter( struct gpio *irq, struct gpio_hpd_config *config) diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c index 2546224b326a..e4496ad203b2 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn21/dcn21_hubbub.c @@ -132,9 +132,9 @@ int hubbub21_init_dchub(struct hubbub *hubbub, // Init VMID 0 based on PA config dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config); } - - dcn21_dchvm_init(hubbub); - + if (!hubbub1->base.ctx->dc->config.skip_riommu_prefetch_wa) { + dcn21_dchvm_init(hubbub); + } return hubbub1->num_vmid; } diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index 5ed195377a6c..baed31611477 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -1032,7 +1032,7 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_program_3dlut_fl_tmz_protected = hubp401_program_3dlut_fl_tmz_protected, .hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar, .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done, - .hubp_clear_tiling = hubp2_clear_tiling, + .hubp_clear_tiling = hubp401_clear_tiling, }; bool hubp401_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/Makefile b/drivers/gpu/drm/amd/display/dc/hwss/Makefile index 40ecebea1ba0..bee617ca0838 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hwss/Makefile @@ -27,6 +27,24 @@ # DCE ############################################################################### +ifdef CONFIG_DRM_AMD_DC_SI +HWSS_DCE60 = dce60_hwseq.o + +AMD_DAL_HWSS_DCE60 = $(addprefix $(AMDDALPATH)/dc/hwss/dce60/,$(HWSS_DCE60)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE60) +endif + +############################################################################### + +HWSS_DCE80 = dce80_hwseq.o + +AMD_DAL_HWSS_DCE80 = $(addprefix $(AMDDALPATH)/dc/hwss/dce80/,$(HWSS_DCE80)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE80) + +############################################################################### + HWSS_DCE = dce_hwseq.o AMD_DAL_HWSS_DCE = $(addprefix $(AMDDALPATH)/dc/hwss/dce/,$(HWSS_DCE)) @@ -65,14 +83,6 @@ AMD_DAL_HWSS_DCE120 = $(addprefix $(AMDDALPATH)/dc/hwss/dce120/,$(HWSS_DCE120)) AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE120) -############################################################################### - -HWSS_DCE80 = dce80_hwseq.o - -AMD_DAL_HWSS_DCE80 = $(addprefix $(AMDDALPATH)/dc/hwss/dce80/,$(HWSS_DCE80)) - -AMD_DISPLAY_FILES += $(AMD_DAL_HWSS_DCE80) - ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### # DCN diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5656d10368ad..23bec5d25ed6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -2763,12 +2763,12 @@ static void dce110_enable_per_frame_crtc_position_reset( } -static void init_pipes(struct dc *dc, struct dc_state *context) +static void dce110_init_pipes(struct dc *dc, struct dc_state *context) { // Do nothing } -static void init_hw(struct dc *dc) +static void dce110_init_hw(struct dc *dc) { int i; struct dc_bios *bp; @@ -3327,7 +3327,7 @@ void dce110_disable_link_output(struct dc_link *link, static const struct hw_sequencer_funcs dce110_funcs = { .program_gamut_remap = program_gamut_remap, .program_output_csc = program_output_csc, - .init_hw = init_hw, + .init_hw = dce110_init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dce110_apply_ctx_for_surface, .post_unlock_program_front_end = dce110_post_unlock_program_front_end, @@ -3371,7 +3371,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { }; static const struct hwseq_private_funcs dce110_private_funcs = { - .init_pipes = init_pipes, + .init_pipes = dce110_init_pipes, .set_input_transfer_func = dce110_set_input_transfer_func, .set_output_transfer_func = dce110_set_output_transfer_func, .power_down = dce110_power_down, diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c similarity index 99% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c rename to drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c index 44b56490e152..a08e9f9eec17 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.c @@ -26,7 +26,7 @@ #include "dm_services.h" #include "dc.h" #include "core_types.h" -#include "dce60_hw_sequencer.h" +#include "dce60_hwseq.h" #include "dce/dce_hwseq.h" #include "dce110/dce110_hwseq.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_hw_sequencer.h rename to drivers/gpu/drm/amd/display/dc/hwss/dce60/dce60_hwseq.h diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 912f96323ed6..f9ee55998b6b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -94,6 +94,128 @@ static void print_microsec(struct dc_context *dc_ctx, us_x10 % frac); } +/* + * Delay until we passed busy-until-point to which we can + * do necessary locking/programming on consecutive full updates + */ +void dcn10_wait_for_pipe_update_if_needed(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only) +{ + struct crtc_position position; + struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int vpos, frame_count; + uint32_t vupdate_start, vupdate_end, vblank_start; + unsigned int lines_to_vupdate, us_to_vupdate; + unsigned int us_per_line, us_vupdate; + + if (!pipe_ctx->stream || + !pipe_ctx->stream_res.tg || + !pipe_ctx->stream_res.stream_enc) + return; + + if (pipe_ctx->prev_odm_pipe && + pipe_ctx->stream) + return; + + if (!pipe_ctx->wait_is_required) + return; + + struct timing_generator *tg = pipe_ctx->stream_res.tg; + + if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg)) + return; + + dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start, + &vupdate_end); + + dc->hwss.get_position(&pipe_ctx, 1, &position); + vpos = position.vertical_count; + + frame_count = tg->funcs->get_frame_count(tg); + + if (frame_count - pipe_ctx->wait_frame_count > 2) + return; + + vblank_start = pipe_ctx->pipe_dlg_param.vblank_start; + + if (vpos >= vupdate_start && vupdate_start >= vblank_start) + lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start; + else + lines_to_vupdate = vupdate_start - vpos; + + us_per_line = + stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz; + us_to_vupdate = lines_to_vupdate * us_per_line; + + if (vupdate_end < vupdate_start) + vupdate_end += stream->timing.v_total; + + if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start) + us_to_vupdate = 0; + + us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line; + + if (is_surface_update_only && us_to_vupdate + us_vupdate > 200) { + //surface updates come in at high irql + pipe_ctx->wait_is_required = true; + return; + } + + fsleep(us_to_vupdate + us_vupdate); + + //clear + pipe_ctx->next_vupdate = 0; + pipe_ctx->wait_frame_count = 0; + pipe_ctx->wait_is_required = false; +} + +/* + * On pipe unlock and programming, indicate pipe will be busy + * until some frame and line (vupdate), this is required for consecutive + * full updates, need to wait for updates + * to latch to try and program the next update + */ +void dcn10_set_wait_for_update_needed_for_pipe(struct dc *dc, struct pipe_ctx *pipe_ctx) +{ + uint32_t vupdate_start, vupdate_end; + struct crtc_position position; + unsigned int vpos, cur_frame; + + if (!pipe_ctx->stream || + !pipe_ctx->stream_res.tg || + !pipe_ctx->stream_res.stream_enc) + return; + + dc->hwss.get_position(&pipe_ctx, 1, &position); + vpos = position.vertical_count; + + dc->hwss.calc_vupdate_position(dc, pipe_ctx, &vupdate_start, + &vupdate_end); + + struct timing_generator *tg = pipe_ctx->stream_res.tg; + + struct optc *optc1 = DCN10TG_FROM_TG(tg); + + ASSERT(optc1->max_frame_count != 0); + + if (tg->funcs->is_tg_enabled && !tg->funcs->is_tg_enabled(tg)) + return; + + pipe_ctx->next_vupdate = vupdate_start; + + cur_frame = tg->funcs->get_frame_count(tg); + + if (vpos < vupdate_start) { + pipe_ctx->wait_frame_count = cur_frame; + } else { + if (cur_frame + 1 > optc1->max_frame_count) + pipe_ctx->wait_frame_count = cur_frame + 1 - optc1->max_frame_count; + else + pipe_ctx->wait_frame_count = cur_frame + 1; + } + + pipe_ctx->wait_is_required = true; +} + void dcn10_lock_all_pipes(struct dc *dc, struct dc_state *context, bool lock) @@ -2664,7 +2786,6 @@ void dcn10_update_visual_confirm_color(struct dc *dc, struct mpc *mpc = dc->res_pool->mpc; if (mpc->funcs->set_bg_color) { - memcpy(&pipe_ctx->plane_state->visual_confirm_color, &(pipe_ctx->visual_confirm_color), sizeof(struct tg_color)); mpc->funcs->set_bg_color(mpc, &(pipe_ctx->visual_confirm_color), mpcc_id); } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h index 42ffd1e1299c..57d30ea225f2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.h @@ -50,6 +50,13 @@ void dcn10_optimize_bandwidth( void dcn10_prepare_bandwidth( struct dc *dc, struct dc_state *context); +void dcn10_wait_for_pipe_update_if_needed( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + bool is_surface_update_only); +void dcn10_set_wait_for_update_needed_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx); void dcn10_pipe_control_lock( struct dc *dc, struct pipe_ctx *pipe, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 846c9c51f2d9..858288c3b1ac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2053,7 +2053,7 @@ void dcn20_program_front_end_for_ctx( for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->top_pipe && !pipe->prev_odm_pipe && pipe->plane_state) { + if (pipe->plane_state) { ASSERT(!pipe->plane_state->triplebuffer_flips); /*turn off triple buffer for full update*/ dc->hwss.program_triplebuffer( @@ -2482,7 +2482,7 @@ bool dcn20_update_bandwidth( struct dce_hwseq *hws = dc->hwseq; /* recalculate DML parameters */ - if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) + if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) return false; /* apply updated bandwidth parameters */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index be26c925fdfa..e68f21fd5f0f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -84,6 +84,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index cd0adf72b223..a0b05b9ef660 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1181,6 +1181,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign struct dc_stream_state *stream = pipe_ctx->stream; unsigned int odm_combine_factor = 0; bool two_pix_per_container = false; + struct dce_hwseq *hws = stream->ctx->dc->hwseq; two_pix_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); @@ -1201,7 +1202,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign } else { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_4; - if ((odm_combine_factor == 2) || dcn32_is_dp_dig_pixel_rate_div_policy(pipe_ctx)) + if ((odm_combine_factor == 2) || (hws->funcs.is_dp_dig_pixel_rate_div_policy && + hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx))) *k2_div = PIXEL_RATE_DIV_BY_2; } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 922b8d71cf1a..c814d957305a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -241,11 +241,6 @@ void dcn35_init_hw(struct dc *dc) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } - if (res_pool->dccg->funcs->dccg_root_gate_disable_control) { - for (i = 0; i < res_pool->pipe_count; i++) - res_pool->dccg->funcs->dccg_root_gate_disable_control(res_pool->dccg, i, 0); - } - for (i = 0; i < res_pool->audio_count; i++) { struct audio *audio = res_pool->audios[i]; @@ -901,12 +896,18 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) { + struct dpp *dpp = pipe_ctx->plane_res.dpp; + struct dccg *dccg = dc->res_pool->dccg; + + /* enable DCFCLK current DCHUB */ pipe_ctx->plane_res.hubp->funcs->hubp_clk_cntl(pipe_ctx->plane_res.hubp, true); /* initialize HUBP on power up */ pipe_ctx->plane_res.hubp->funcs->hubp_init(pipe_ctx->plane_res.hubp); - + /*make sure DPPCLK is on*/ + dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, true); + dpp->funcs->dpp_dppclk_control(dpp, false, true); /* make sure OPP_PIPE_CLOCK_EN = 1 */ pipe_ctx->stream_res.opp->funcs->opp_pipe_clock_control( pipe_ctx->stream_res.opp, @@ -923,6 +924,7 @@ void dcn35_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx, // Program system aperture settings pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt); } + //DC_LOG_DEBUG("%s: dpp_inst(%d) =\n", __func__, dpp->inst); if (!pipe_ctx->top_pipe && pipe_ctx->plane_state @@ -938,6 +940,8 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct hubp *hubp = pipe_ctx->plane_res.hubp; struct dpp *dpp = pipe_ctx->plane_res.dpp; + struct dccg *dccg = dc->res_pool->dccg; + dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx); @@ -955,7 +959,8 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) hubp->funcs->hubp_clk_cntl(hubp, false); dpp->funcs->dpp_dppclk_control(dpp, false, false); -/*to do, need to support both case*/ + dccg->funcs->dccg_root_gate_disable_control(dccg, dpp->inst, false); + hubp->power_gated = true; hubp->funcs->hubp_reset(hubp); @@ -967,6 +972,8 @@ void dcn35_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->top_pipe = NULL; pipe_ctx->bottom_pipe = NULL; pipe_ctx->plane_state = NULL; + //DC_LOG_DEBUG("%s: dpp_inst(%d)=\n", __func__, dpp->inst); + } void dcn35_disable_plane(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx) @@ -1543,7 +1550,7 @@ static bool should_avoid_empty_tu(struct pipe_ctx *pipe_ctx) struct dc_link_settings *link_settings = &pipe_ctx->link_config.dp_link_settings; const struct dc *dc = pipe_ctx->stream->link->dc; - if (pipe_ctx->stream->link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + if (pipe_ctx->link_config.dp_tunnel_settings.should_enable_dp_tunneling == false) return false; // Not necessary for MST configurations diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 6a82a865209c..a3ccf805bd16 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -168,6 +168,8 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, + .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, + .set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe, }; void dcn35_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 902a96940a01..58f2be2a326b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -158,10 +158,12 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .set_mcm_luts = dcn32_set_mcm_luts, .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, - .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, + .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, + .set_wait_for_update_needed_for_pipe = dcn10_set_wait_for_update_needed_for_pipe, }; void dcn351_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 3af6a3402b89..c4177a9a662f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -396,6 +396,249 @@ static void dcn401_get_mcm_lut_xable_from_pipe_ctx(struct dc *dc, struct pipe_ct } } +static void dcn401_set_mcm_location_post_blend(struct dc *dc, struct pipe_ctx *pipe_ctx, bool bPostBlend) +{ + struct mpc *mpc = dc->res_pool->mpc; + int mpcc_id = pipe_ctx->plane_res.hubp->inst; + + if (!pipe_ctx->plane_state) + return; + + mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); + pipe_ctx->plane_state->mcm_location = (bPostBlend) ? + MPCC_MOVABLE_CM_LOCATION_AFTER : + MPCC_MOVABLE_CM_LOCATION_BEFORE; +} + +static void dc_get_lut_mode( + enum dc_cm2_gpu_mem_layout layout, + enum hubp_3dlut_fl_mode *mode, + enum hubp_3dlut_fl_addressing_mode *addr_mode) +{ + switch (layout) { + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: + *mode = hubp_3dlut_fl_mode_native_1; + *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: + *mode = hubp_3dlut_fl_mode_native_2; + *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: + *mode = hubp_3dlut_fl_mode_transform; + *addr_mode = hubp_3dlut_fl_addressing_mode_simple_linear; + break; + default: + *mode = hubp_3dlut_fl_mode_disable; + *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; + break; + } +} + +static void dc_get_lut_format( + enum dc_cm2_gpu_mem_format dc_format, + enum hubp_3dlut_fl_format *format) +{ + switch (dc_format) { + case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB: + *format = hubp_3dlut_fl_format_unorm_12msb_bitslice; + break; + case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB: + *format = hubp_3dlut_fl_format_unorm_12lsb_bitslice; + break; + case DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10: + *format = hubp_3dlut_fl_format_float_fp1_5_10; + break; + } +} + +static void dc_get_lut_xbar( + enum dc_cm2_gpu_mem_pixel_component_order order, + enum hubp_3dlut_fl_crossbar_bit_slice *cr_r, + enum hubp_3dlut_fl_crossbar_bit_slice *y_g, + enum hubp_3dlut_fl_crossbar_bit_slice *cb_b) +{ + switch (order) { + case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA: + *cr_r = hubp_3dlut_fl_crossbar_bit_slice_32_47; + *y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; + *cb_b = hubp_3dlut_fl_crossbar_bit_slice_0_15; + break; + case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA: + *cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15; + *y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; + *cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47; + break; + } +} + +static void dc_get_lut_width( + enum dc_cm2_gpu_mem_size size, + enum hubp_3dlut_fl_width *width) +{ + switch (size) { + case DC_CM2_GPU_MEM_SIZE_333333: + *width = hubp_3dlut_fl_width_33; + break; + case DC_CM2_GPU_MEM_SIZE_171717: + *width = hubp_3dlut_fl_width_17; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + *width = hubp_3dlut_fl_width_transformed; + break; + } +} +static bool dc_is_rmcm_3dlut_supported(struct hubp *hubp, struct mpc *mpc) +{ + if (mpc->funcs->rmcm.update_3dlut_fast_load_select && + mpc->funcs->rmcm.program_lut_read_write_control && + hubp->funcs->hubp_program_3dlut_fl_addr && + mpc->funcs->rmcm.program_bit_depth && + hubp->funcs->hubp_program_3dlut_fl_mode && + hubp->funcs->hubp_program_3dlut_fl_addressing_mode && + hubp->funcs->hubp_program_3dlut_fl_format && + hubp->funcs->hubp_update_3dlut_fl_bias_scale && + mpc->funcs->rmcm.program_bias_scale && + hubp->funcs->hubp_program_3dlut_fl_crossbar && + hubp->funcs->hubp_program_3dlut_fl_width && + mpc->funcs->rmcm.update_3dlut_fast_load_select && + mpc->funcs->rmcm.populate_lut && + mpc->funcs->rmcm.program_lut_mode && + hubp->funcs->hubp_enable_3dlut_fl && + mpc->funcs->rmcm.enable_3dlut_fl) + return true; + + return false; +} + +bool dcn401_program_rmcm_luts( + struct hubp *hubp, + struct pipe_ctx *pipe_ctx, + enum dc_cm2_transfer_func_source lut3d_src, + struct dc_cm2_func_luts *mcm_luts, + struct mpc *mpc, + bool lut_bank_a, + int mpcc_id) +{ + struct dpp *dpp_base = pipe_ctx->plane_res.dpp; + union mcm_lut_params m_lut_params; + enum MCM_LUT_XABLE shaper_xable, lut3d_xable = MCM_LUT_DISABLE, lut1d_xable; + enum hubp_3dlut_fl_mode mode; + enum hubp_3dlut_fl_addressing_mode addr_mode; + enum hubp_3dlut_fl_format format = 0; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g = 0; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b = 0; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r = 0; + enum hubp_3dlut_fl_width width = 0; + struct dc *dc = hubp->ctx->dc; + + bool bypass_rmcm_3dlut = false; + bool bypass_rmcm_shaper = false; + + dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); + + /* 3DLUT */ + switch (lut3d_src) { + case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM: + memset(&m_lut_params, 0, sizeof(m_lut_params)); + // Don't know what to do in this case. + //case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM: + break; + case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: + dc_get_lut_width(mcm_luts->lut3d_data.gpu_mem_params.size, &width); + if (!dc_is_rmcm_3dlut_supported(hubp, mpc) || + !mpc->funcs->rmcm.is_config_supported(width)) + return false; + + //0. disable fl on mpc + mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, 0xF); + + //1. power down the block + mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, false); + + //2. program RMCM + //2a. 3dlut reg programming + mpc->funcs->rmcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, + (!bypass_rmcm_3dlut) && lut3d_xable != MCM_LUT_DISABLE, mpcc_id); + + hubp->funcs->hubp_program_3dlut_fl_addr(hubp, + mcm_luts->lut3d_data.gpu_mem_params.addr); + + mpc->funcs->rmcm.program_bit_depth(mpc, + mcm_luts->lut3d_data.gpu_mem_params.bit_depth, mpcc_id); + + // setting native or transformed mode, + dc_get_lut_mode(mcm_luts->lut3d_data.gpu_mem_params.layout, &mode, &addr_mode); + + //these program the mcm 3dlut + hubp->funcs->hubp_program_3dlut_fl_mode(hubp, mode); + + hubp->funcs->hubp_program_3dlut_fl_addressing_mode(hubp, addr_mode); + + //seems to be only for the MCM + dc_get_lut_format(mcm_luts->lut3d_data.gpu_mem_params.format_params.format, &format); + hubp->funcs->hubp_program_3dlut_fl_format(hubp, format); + + mpc->funcs->rmcm.program_bias_scale(mpc, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale, + mpcc_id); + hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale); + + dc_get_lut_xbar( + mcm_luts->lut3d_data.gpu_mem_params.component_order, + &crossbar_bit_slice_cr_r, + &crossbar_bit_slice_y_g, + &crossbar_bit_slice_cb_b); + + hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp, + crossbar_bit_slice_cr_r, + crossbar_bit_slice_y_g, + crossbar_bit_slice_cb_b); + + mpc->funcs->rmcm.program_3dlut_size(mpc, width, mpcc_id); + + mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst); + + //2b. shaper reg programming + memset(&m_lut_params, 0, sizeof(m_lut_params)); + + if (mcm_luts->shaper->type == TF_TYPE_HWPWL) { + m_lut_params.pwl = &mcm_luts->shaper->pwl; + } else if (mcm_luts->shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { + ASSERT(false); + cm_helper_translate_curve_to_hw_format( + dc->ctx, + mcm_luts->shaper, + &dpp_base->regamma_params, true); + m_lut_params.pwl = &dpp_base->regamma_params; + } + if (m_lut_params.pwl) { + mpc->funcs->rmcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id); + mpc->funcs->rmcm.program_lut_mode(mpc, !bypass_rmcm_shaper, lut_bank_a, mpcc_id); + } else { + //RMCM 3dlut won't work without its shaper + return false; + } + + //3. Select the hubp connected to this RMCM + hubp->funcs->hubp_enable_3dlut_fl(hubp, true); + mpc->funcs->rmcm.enable_3dlut_fl(mpc, true, mpcc_id); + + //4. power on the block + if (m_lut_params.pwl) + mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, true); + + break; + default: + return false; + } + + return true; +} + void dcn401_populate_mcm_luts(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_cm2_func_luts mcm_luts, @@ -407,21 +650,39 @@ void dcn401_populate_mcm_luts(struct dc *dc, struct mpc *mpc = dc->res_pool->mpc; union mcm_lut_params m_lut_params; enum dc_cm2_transfer_func_source lut3d_src = mcm_luts.lut3d_data.lut3d_src; - enum hubp_3dlut_fl_format format; + enum hubp_3dlut_fl_format format = 0; enum hubp_3dlut_fl_mode mode; - enum hubp_3dlut_fl_width width; + enum hubp_3dlut_fl_width width = 0; enum hubp_3dlut_fl_addressing_mode addr_mode; - enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g; - enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b; - enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g = 0; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b = 0; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r = 0; enum MCM_LUT_XABLE shaper_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE; - bool is_17x17x17 = true; bool rval; dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); + //MCM - setting its location (Before/After) blender + //set to post blend (true) + dcn401_set_mcm_location_post_blend( + dc, + pipe_ctx, + mcm_luts.lut3d_data.mpc_mcm_post_blend); + + //RMCM - 3dLUT+Shaper + if (mcm_luts.lut3d_data.rmcm_3dlut_enable) { + dcn401_program_rmcm_luts( + hubp, + pipe_ctx, + lut3d_src, + &mcm_luts, + mpc, + lut_bank_a, + mpcc_id); + } + /* 1D LUT */ if (mcm_luts.lut1d_func) { memset(&m_lut_params, 0, sizeof(m_lut_params)); @@ -442,7 +703,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, } /* Shaper */ - if (mcm_luts.shaper) { + if (mcm_luts.shaper && mcm_luts.lut3d_data.mpc_3dlut_enable) { memset(&m_lut_params, 0, sizeof(m_lut_params)); if (mcm_luts.shaper->type == TF_TYPE_HWPWL) m_lut_params.pwl = &mcm_luts.shaper->pwl; @@ -454,11 +715,11 @@ void dcn401_populate_mcm_luts(struct dc *dc, m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { - if (mpc->funcs->populate_lut) - mpc->funcs->populate_lut(mpc, MCM_LUT_SHAPER, m_lut_params, lut_bank_a, mpcc_id); + if (mpc->funcs->mcm.populate_lut) + mpc->funcs->mcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id); + if (mpc->funcs->program_lut_mode) + mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, MCM_LUT_ENABLE, lut_bank_a, mpcc_id); } - if (mpc->funcs->program_lut_mode) - mpc->funcs->program_lut_mode(mpc, MCM_LUT_SHAPER, shaper_xable, lut_bank_a, mpcc_id); } /* 3DLUT */ @@ -467,6 +728,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, memset(&m_lut_params, 0, sizeof(m_lut_params)); if (hubp->funcs->hubp_enable_3dlut_fl) hubp->funcs->hubp_enable_3dlut_fl(hubp, false); + if (mcm_luts.lut3d_data.lut3d_func && mcm_luts.lut3d_data.lut3d_func->state.bits.initialized) { m_lut_params.lut3d = &mcm_luts.lut3d_data.lut3d_func->lut_3d; if (mpc->funcs->populate_lut) @@ -476,16 +738,35 @@ void dcn401_populate_mcm_luts(struct dc *dc, mpcc_id); } break; - case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: + case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: + switch (mcm_luts.lut3d_data.gpu_mem_params.size) { + case DC_CM2_GPU_MEM_SIZE_333333: + width = hubp_3dlut_fl_width_33; + break; + case DC_CM2_GPU_MEM_SIZE_171717: + width = hubp_3dlut_fl_width_17; + break; + case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + width = hubp_3dlut_fl_width_transformed; + break; + } + + //check for support + if (mpc->funcs->mcm.is_config_supported && + !mpc->funcs->mcm.is_config_supported(width)) + break; if (mpc->funcs->program_lut_read_write_control) mpc->funcs->program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, mpcc_id); if (mpc->funcs->program_lut_mode) mpc->funcs->program_lut_mode(mpc, MCM_LUT_3DLUT, lut3d_xable, lut_bank_a, mpcc_id); - if (mpc->funcs->program_3dlut_size) - mpc->funcs->program_3dlut_size(mpc, is_17x17x17, mpcc_id); + if (hubp->funcs->hubp_program_3dlut_fl_addr) hubp->funcs->hubp_program_3dlut_fl_addr(hubp, mcm_luts.lut3d_data.gpu_mem_params.addr); + + if (mpc->funcs->mcm.program_bit_depth) + mpc->funcs->mcm.program_bit_depth(mpc, mcm_luts.lut3d_data.gpu_mem_params.bit_depth, mpcc_id); + switch (mcm_luts.lut3d_data.gpu_mem_params.layout) { case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: mode = hubp_3dlut_fl_mode_native_1; @@ -512,7 +793,6 @@ void dcn401_populate_mcm_luts(struct dc *dc, switch (mcm_luts.lut3d_data.gpu_mem_params.format_params.format) { case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB: - default: format = hubp_3dlut_fl_format_unorm_12msb_bitslice; break; case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB: @@ -524,37 +804,37 @@ void dcn401_populate_mcm_luts(struct dc *dc, } if (hubp->funcs->hubp_program_3dlut_fl_format) hubp->funcs->hubp_program_3dlut_fl_format(hubp, format); - if (hubp->funcs->hubp_update_3dlut_fl_bias_scale) + if (hubp->funcs->hubp_update_3dlut_fl_bias_scale && + mpc->funcs->mcm.program_bias_scale) { + mpc->funcs->mcm.program_bias_scale(mpc, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale, + mpcc_id); hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp, - mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias, - mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale); - - switch (mcm_luts.lut3d_data.gpu_mem_params.component_order) { - case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA: - default: - crossbar_bit_slice_cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15; - crossbar_bit_slice_y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; - crossbar_bit_slice_cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47; - break; + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.bias, + mcm_luts.lut3d_data.gpu_mem_params.format_params.float_params.scale); } + //navi 4x has a bug and r and blue are swapped and need to be worked around here in + //TODO: need to make a method for get_xbar per asic OR do the workaround in program_crossbar for 4x + dc_get_lut_xbar( + mcm_luts.lut3d_data.gpu_mem_params.component_order, + &crossbar_bit_slice_cr_r, + &crossbar_bit_slice_y_g, + &crossbar_bit_slice_cb_b); + if (hubp->funcs->hubp_program_3dlut_fl_crossbar) hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp, + crossbar_bit_slice_cr_r, crossbar_bit_slice_y_g, - crossbar_bit_slice_cb_b, - crossbar_bit_slice_cr_r); + crossbar_bit_slice_cb_b); + + if (mpc->funcs->mcm.program_lut_read_write_control) + mpc->funcs->mcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, true, mpcc_id); + + if (mpc->funcs->mcm.program_3dlut_size) + mpc->funcs->mcm.program_3dlut_size(mpc, width, mpcc_id); - switch (mcm_luts.lut3d_data.gpu_mem_params.size) { - case DC_CM2_GPU_MEM_SIZE_171717: - default: - width = hubp_3dlut_fl_width_17; - break; - case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: - width = hubp_3dlut_fl_width_transformed; - break; - } - if (hubp->funcs->hubp_program_3dlut_fl_width) - hubp->funcs->hubp_program_3dlut_fl_width(hubp, width); if (mpc->funcs->update_3dlut_fast_load_select) mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst); @@ -2081,7 +2361,7 @@ void dcn401_program_front_end_for_ctx( for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->top_pipe && !pipe->prev_odm_pipe && pipe->plane_state) { + if (pipe->plane_state) { if (pipe->plane_state->triplebuffer_flips) BREAK_TO_DEBUGGER(); @@ -2371,7 +2651,7 @@ bool dcn401_update_bandwidth( struct dce_hwseq *hws = dc->hwseq; /* recalculate DML parameters */ - if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) + if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) return false; /* apply updated bandwidth parameters */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 781cf0efccc6..ce65b4f6c672 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -109,4 +109,12 @@ void dcn401_detect_pipe_changes( void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); +bool dcn401_program_rmcm_luts( + struct hubp *hubp, + struct pipe_ctx *pipe_ctx, + enum dc_cm2_transfer_func_source lut3d_src, + struct dc_cm2_func_luts *mcm_luts, + struct mpc *mpc, + bool lut_bank_a, + int mpcc_id); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index c8b5ed834579..3a0795045bc6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -195,6 +195,8 @@ enum block_sequence_func { DMUB_SUBVP_SAVE_SURF_ADDR, HUBP_WAIT_FOR_DCC_META_PROP, DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST, + /* This must be the last value in this enum, add new ones above */ + HWSS_BLOCK_SEQUENCE_FUNC_COUNT }; struct block_sequence { @@ -202,6 +204,8 @@ struct block_sequence { enum block_sequence_func func; }; +#define MAX_HWSS_BLOCK_SEQUENCE_SIZE (HWSS_BLOCK_SEQUENCE_FUNC_COUNT * MAX_PIPES) + struct hw_sequencer_funcs { void (*hardware_release)(struct dc *dc); /* Embedded Display Related */ @@ -534,13 +538,13 @@ void set_drr_and_clear_adjust_pending( struct drr_params *params); void hwss_execute_sequence(struct dc *dc, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], int num_steps); void hwss_build_fast_sequence(struct dc *dc, struct dc_dmub_cmd *dc_dmub_cmd, unsigned int dmub_cmd_count, - struct block_sequence block_sequence[], + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE], unsigned int *num_steps, struct pipe_ctx *pipe_ctx, struct dc_stream_status *stream_status, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 22a5d4a03c98..09bc65c2fa23 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -183,6 +183,8 @@ struct hwseq_private_funcs { struct dc_cm2_func_luts mcm_luts, bool lut_bank_a); void (*perform_3dlut_wa_unlock)(struct pipe_ctx *pipe_ctx); + void (*wait_for_pipe_update_if_needed)(struct dc *dc, struct pipe_ctx *pipe_ctx, bool is_surface_update_only); + void (*set_wait_for_update_needed_for_pipe)(struct dc *dc, struct pipe_ctx *pipe_ctx); }; struct dce_hwseq { diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index b5afd8c3103d..f3696143590c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -26,6 +26,8 @@ #ifndef _CORE_STATUS_H_ #define _CORE_STATUS_H_ +#include "dc_hw_types.h" + enum dc_status { DC_OK = 1, @@ -56,6 +58,7 @@ enum dc_status { DC_NO_LINK_ENC_RESOURCE = 26, DC_FAIL_DP_PAYLOAD_ALLOCATION = 27, DC_FAIL_DP_LINK_BANDWIDTH = 28, + DC_FAIL_HW_CURSOR_SUPPORT = 29, DC_ERROR_UNEXPECTED = -1 }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index d0021f25f3d8..0cf349cafb3e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -65,6 +65,7 @@ struct resource_pool; struct dc_state; struct resource_context; struct clk_bw_params; +struct dc_mcache_params; struct resource_funcs { enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index); @@ -78,8 +79,7 @@ struct resource_funcs { /* Create a minimal link encoder object with no dc_link object * associated with it. */ struct link_encoder *(*link_enc_create_minimal)(struct dc_context *ctx, enum engine_id eng_id); - - bool (*validate_bandwidth)( + enum dc_status (*validate_bandwidth)( struct dc *dc, struct dc_state *context, bool fast_validate); @@ -218,6 +218,11 @@ struct resource_funcs { int (*get_power_profile)(const struct dc_state *context); unsigned int (*get_det_buffer_size)(const struct dc_state *context); unsigned int (*get_vstartup_for_pipe)(struct pipe_ctx *pipe_ctx); + unsigned int (*get_max_hw_cursor_size)(const struct dc *dc, + struct dc_state *state, + const struct dc_stream_state *stream); + bool (*program_mcache_pipe_config)(struct dc_state *context, + const struct dc_mcache_params *mcache_params); }; struct audio_support{ @@ -382,7 +387,9 @@ struct link_resource { struct link_config { struct dc_link_settings dp_link_settings; + struct dc_tunnel_settings dp_tunnel_settings; }; + union pipe_update_flags { struct { uint32_t enable : 1; @@ -480,6 +487,10 @@ struct pipe_ctx { struct pixel_rate_divider pixel_rate_divider; /* pixels borrowed from hblank to hactive */ uint8_t hblank_borrow; + /* next vupdate */ + uint32_t next_vupdate; + uint32_t wait_frame_count; + bool wait_is_required; }; /* Data used for dynamic link encoder assignment. @@ -507,7 +518,7 @@ struct resource_context { unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; bool is_mpc_3dlut_acquired[MAX_PIPES]; - /* solely used for build scalar data in dml2 */ + /* used to build scalar data in dml2 and for edp backlight programming */ struct pipe_ctx temp_pipe; }; @@ -630,7 +641,7 @@ struct dc_state { */ struct bw_context bw_ctx; - struct block_sequence block_sequence[100]; + struct block_sequence block_sequence[MAX_HWSS_BLOCK_SEQUENCE_SIZE]; unsigned int block_sequence_steps; struct dc_dmub_cmd dc_dmub_cmd[10]; unsigned int dmub_cmd_count; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 221645c023b5..bac8febad69a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -199,6 +199,7 @@ enum dentist_divider_range { CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \ CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \ CLK_SR_DCN35(CLK5_spll_field_8), \ + CLK_SR_DCN35(CLK6_spll_field_8), \ SR(DENTIST_DISPCLK_CNTL), \ #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \ @@ -307,7 +308,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK4_ALLOW_DS; uint32_t CLK1_CLK5_ALLOW_DS; uint32_t CLK5_spll_field_8; - + uint32_t CLK6_spll_field_8; }; struct clk_mgr_shift { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 3a89cc0cffc1..6e303b81bfb0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -967,23 +967,6 @@ struct mpc_funcs { */ void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx); - /** - * @get_3dlut_fast_load_status: - * - * Get 3D LUT fast load status and reference them with done, soft_underflow and hard_underflow pointers. - * - * Parameters: - * - [in/out] mpc - MPC context. - * - [in] mpcc_id - * - [in/out] done - * - [in/out] soft_underflow - * - [in/out] hard_underflow - * - * Return: - * - * void - */ - void (*get_3dlut_fast_load_status)(struct mpc *mpc, int mpcc_id, uint32_t *done, uint32_t *soft_underflow, uint32_t *hard_underflow); /** * @populate_lut: @@ -1054,6 +1037,35 @@ struct mpc_funcs { * void */ void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id); + + struct { + void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id); + void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id); + void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id); + bool (*is_config_supported)(uint32_t width); + void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id, + bool lut_bank_a, bool enabled, int mpcc_id); + + void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params, + bool lut_bank_a, int mpcc_id); + } mcm; + + struct { + void (*enable_3dlut_fl)(struct mpc *mpc, bool enable, int mpcc_id); + void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx); + void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id, + bool lut_bank_a, bool enabled, int mpcc_id); + void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_XABLE xable, + bool lut_bank_a, int mpcc_id); + void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id); + void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id); + void (*program_bit_depth)(struct mpc *mpc, uint16_t bit_depth, int mpcc_id); + bool (*is_config_supported)(uint32_t width); + + void (*power_on_shaper_3dlut)(struct mpc *mpc, uint32_t mpcc_id, bool power_on); + void (*populate_lut)(struct mpc *mpc, const union mcm_lut_params params, + bool lut_bank_a, int mpcc_id); + } rmcm; }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 7f371cbb35cd..0d5a8358a778 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -68,6 +68,7 @@ struct optc { int pstate_keepout; struct dc_crtc_timing orginal_patched_timing; enum signal_type signal; + uint32_t max_frame_count; }; void optc1_read_otg_state(struct timing_generator *optc, struct dcn_otg_state *s); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index 2948a696ee12..7d16351bba99 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -207,6 +207,9 @@ struct link_service { bool (*dp_decide_link_settings)( struct dc_stream_state *stream, struct dc_link_settings *link_setting); + void (*dp_decide_tunnel_settings)( + struct dc_stream_state *stream, + struct dc_tunnel_settings *dp_tunnel_setting); enum dp_link_encoding (*mst_decide_link_encoding_format)( const struct dc_link *link); bool (*edp_decide_link_settings)(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index a402df225a76..26cb1459b743 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -508,6 +508,10 @@ uint32_t generic_indirect_reg_update_ex(const struct dc_context *ctx, initial_val, \ n, __VA_ARGS__) +#define IX_REG_SET_SYNC(index, init_value, f1, v1) \ + IX_REG_SET_N_SYNC(index, 1, init_value, \ + FN(reg, f1), v1) + #define IX_REG_SET_2_SYNC(index, init_value, f1, v1, f2, v2) \ IX_REG_SET_N_SYNC(index, 2, init_value, \ FN(reg, f1), v1,\ diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 9458187b834d..a890f581f4e8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -32,6 +32,7 @@ #define MEMORY_TYPE_MULTIPLIER_CZ 4 #define MEMORY_TYPE_HBM 2 +#define MAX_MCACHES 8 #define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0) @@ -65,6 +66,13 @@ struct resource_straps { uint32_t audio_stream_number; }; +struct dc_mcache_allocations { + int global_mcache_ids_plane0[MAX_MCACHES + 1]; + int global_mcache_ids_plane1[MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane0[MAX_MCACHES + 1]; + int global_mcache_ids_mall_plane1[MAX_MCACHES + 1]; +}; + struct resource_create_funcs { void (*read_dce_straps)( struct dc_context *ctx, struct resource_straps *straps); @@ -628,8 +636,6 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx); -bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream); - /* Get hw programming parameters container from pipe context * @pipe_ctx: pipe context * @dscl_prog_data: struct to hold programmable hw reg values diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c index 953f4a4dacad..33ce470e4c88 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c @@ -37,36 +37,9 @@ #include "ivsrcid/ivsrcid_vislands30.h" -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c b/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c index 2c72074310c7..d777b85e70da 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce60/irq_service_dce60.c @@ -46,36 +46,9 @@ #include "dc_types.h" -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - DC_HPD1_INT_STATUS, - DC_HPD1_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - DC_HPD1_INT_CONTROL, - DC_HPD1_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd1_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { @@ -391,5 +364,3 @@ struct irq_service *dal_irq_service_dce60_create( dce60_irq_construct(irq_service, init_data); return irq_service; } - - diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c index 49317934ef4f..3a9163acb49b 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c @@ -37,36 +37,9 @@ #include "dc_types.h" -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - DC_HPD1_INT_STATUS, - DC_HPD1_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - DC_HPD1_INT_CONTROL, - DC_HPD1_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd1_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { @@ -303,5 +276,3 @@ struct irq_service *dal_irq_service_dce80_create( dce80_irq_construct(irq_service, init_data); return irq_service; } - - diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c index 9ca28565a9d1..4ce9edd16344 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c @@ -129,36 +129,9 @@ static enum dc_irq_source to_dal_irq_source_dcn10(struct irq_service *irq_servic } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c index 916f0c974637..5847af0e66cb 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c @@ -130,36 +130,9 @@ static enum dc_irq_source to_dal_irq_source_dcn20( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c index 1d61d475d36f..6417011d2246 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c @@ -80,36 +80,9 @@ static enum dc_irq_source to_dal_irq_source_dcn201( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c index 42cdfe6c3538..71d2f065140b 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c @@ -132,36 +132,9 @@ static enum dc_irq_source to_dal_irq_source_dcn21(struct irq_service *irq_servic return DC_IRQ_SOURCE_INVALID; } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index a443a8abb1ea..2a4080bdcf6b 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -139,36 +139,9 @@ static enum dc_irq_source to_dal_irq_source_dcn30( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { @@ -447,4 +420,3 @@ struct irq_service *dal_irq_service_dcn30_create( dcn30_irq_construct(irq_service, init_data); return irq_service; } - diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c b/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c index 8ffc7e2c681a..624f1ac309f8 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn302/irq_service_dcn302.c @@ -126,26 +126,9 @@ static enum dc_irq_source to_dal_irq_source_dcn302(struct irq_service *irq_servi } } -static bool hpd_ack(struct irq_service *irq_service, const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = get_reg_field_value(value, HPD0_DC_HPD_INT_STATUS, DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value(value, current_status ? 0 : 1, HPD0_DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c index 262bb8b74b15..137caffae916 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c @@ -77,26 +77,9 @@ static enum dc_irq_source to_dal_irq_source_dcn303(struct irq_service *irq_servi } } -static bool hpd_ack(struct irq_service *irq_service, const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = get_reg_field_value(value, HPD0_DC_HPD_INT_STATUS, DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value(value, current_status ? 0 : 1, HPD0_DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c index 53e78ae7eecf..921cb167d920 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn31/irq_service_dcn31.c @@ -128,36 +128,9 @@ static enum dc_irq_source to_dal_irq_source_dcn31(struct irq_service *irq_servic } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c b/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c index e0563e880432..0118fd6e5db0 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn314/irq_service_dcn314.c @@ -130,36 +130,9 @@ static enum dc_irq_source to_dal_irq_source_dcn314(struct irq_service *irq_servi } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c b/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c index 2ef22299101a..adebfc888618 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn315/irq_service_dcn315.c @@ -135,36 +135,9 @@ static enum dc_irq_source to_dal_irq_source_dcn315( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c index f0ac0aeeac51..e9e315c75d76 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn32/irq_service_dcn32.c @@ -129,36 +129,9 @@ static enum dc_irq_source to_dal_irq_source_dcn32( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { @@ -191,6 +164,16 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .ack = NULL }; +static struct irq_source_info_funcs vline1_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs vline2_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -259,6 +242,13 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } +#define vblank_int_entry(reg_num)\ + [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ + .funcs = &vblank_irq_info_funcs\ + } /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic * of DCE's DC_IRQ_SOURCE_VUPDATEx. */ @@ -270,14 +260,6 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vupdate_no_lock_irq_info_funcs\ } -#define vblank_int_entry(reg_num)\ - [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ - IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ -} - #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -285,6 +267,20 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ .funcs = &vline0_irq_info_funcs\ } +#define vline1_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\ + .funcs = &vline1_irq_info_funcs\ + } +#define vline2_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\ + .funcs = &vline2_irq_info_funcs\ + } #define dmub_outbox_int_entry()\ [DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\ IRQ_REG_ENTRY_DMUB(\ @@ -387,21 +383,29 @@ irq_source_info_dcn32[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_no_lock_int_entry(0), - vupdate_no_lock_int_entry(1), - vupdate_no_lock_int_entry(2), - vupdate_no_lock_int_entry(3), vblank_int_entry(0), vblank_int_entry(1), vblank_int_entry(2), vblank_int_entry(3), + [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), + [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), + dmub_outbox_int_entry(), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), vline0_int_entry(0), vline0_int_entry(1), vline0_int_entry(2), vline0_int_entry(3), - [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), - [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), - dmub_outbox_int_entry(), + vline1_int_entry(0), + vline1_int_entry(1), + vline1_int_entry(2), + vline1_int_entry(3), + vline2_int_entry(0), + vline2_int_entry(1), + vline2_int_entry(2), + vline2_int_entry(3) }; static const struct irq_service_funcs irq_service_funcs_dcn32 = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c b/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c index ea8c271171bc..79e5e8c137ca 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn35/irq_service_dcn35.c @@ -127,36 +127,9 @@ static enum dc_irq_source to_dal_irq_source_dcn35( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c b/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c index 7ec8e0de2f01..163b8ee9ebf7 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn351/irq_service_dcn351.c @@ -106,36 +106,9 @@ static enum dc_irq_source to_dal_irq_source_dcn351( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c b/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c index ea958628f8b8..f716ab0fd30e 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn36/irq_service_dcn36.c @@ -105,36 +105,9 @@ static enum dc_irq_source to_dal_irq_source_dcn36( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c index b43c9524b0de..fd9bb1950c20 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn401/irq_service_dcn401.c @@ -109,36 +109,9 @@ static enum dc_irq_source to_dal_irq_source_dcn401( } } -static bool hpd_ack( - struct irq_service *irq_service, - const struct irq_source_info *info) -{ - uint32_t addr = info->status_reg; - uint32_t value = dm_read_reg(irq_service->ctx, addr); - uint32_t current_status = - get_reg_field_value( - value, - HPD0_DC_HPD_INT_STATUS, - DC_HPD_SENSE_DELAYED); - - dal_irq_service_ack_generic(irq_service, info); - - value = dm_read_reg(irq_service->ctx, info->enable_reg); - - set_reg_field_value( - value, - current_status ? 0 : 1, - HPD0_DC_HPD_INT_CONTROL, - DC_HPD_INT_POLARITY); - - dm_write_reg(irq_service->ctx, info->enable_reg, value); - - return true; -} - static struct irq_source_info_funcs hpd_irq_info_funcs = { .set = NULL, - .ack = hpd_ack + .ack = hpd0_ack }; static struct irq_source_info_funcs hpd_rx_irq_info_funcs = { @@ -171,6 +144,16 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .ack = NULL }; +static struct irq_source_info_funcs vline1_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + +static struct irq_source_info_funcs vline2_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -239,6 +222,13 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } +#define vblank_int_entry(reg_num)\ + [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ + .funcs = &vblank_irq_info_funcs\ + } /* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic * of DCE's DC_IRQ_SOURCE_VUPDATEx. */ @@ -250,13 +240,6 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vupdate_no_lock_irq_info_funcs\ } -#define vblank_int_entry(reg_num)\ - [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ - IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VSTARTUP_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ - } #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -264,6 +247,20 @@ static struct irq_source_info_funcs vline0_irq_info_funcs = { OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ .funcs = &vline0_irq_info_funcs\ } +#define vline1_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT1_CONTROL, OTG_VERTICAL_INTERRUPT1_CLEAR),\ + .funcs = &vline1_irq_info_funcs\ + } +#define vline2_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE2 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_CLEAR),\ + .funcs = &vline2_irq_info_funcs\ + } #define dmub_outbox_int_entry()\ [DC_IRQ_SOURCE_DMCUB_OUTBOX] = {\ IRQ_REG_ENTRY_DMUB(\ @@ -364,21 +361,29 @@ irq_source_info_dcn401[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_no_lock_int_entry(0), - vupdate_no_lock_int_entry(1), - vupdate_no_lock_int_entry(2), - vupdate_no_lock_int_entry(3), vblank_int_entry(0), vblank_int_entry(1), vblank_int_entry(2), vblank_int_entry(3), + [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), + [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), + dmub_outbox_int_entry(), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), vline0_int_entry(0), vline0_int_entry(1), vline0_int_entry(2), vline0_int_entry(3), - [DC_IRQ_SOURCE_DC5_VLINE1] = dummy_irq_entry(), - [DC_IRQ_SOURCE_DC6_VLINE1] = dummy_irq_entry(), - dmub_outbox_int_entry(), + vline1_int_entry(0), + vline1_int_entry(1), + vline1_int_entry(2), + vline1_int_entry(3), + vline2_int_entry(0), + vline2_int_entry(1), + vline2_int_entry(2), + vline2_int_entry(3), }; static const struct irq_service_funcs irq_service_funcs_dcn401 = { diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c index eca3d7ee7e4e..b595a11c5eaf 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c @@ -41,6 +41,16 @@ #include "reg_helper.h" #include "irq_service.h" +//HPD0_DC_HPD_INT_STATUS +#define HPD0_DC_HPD_INT_STATUS__DC_HPD_SENSE_DELAYED_MASK 0x00000010L +#define HPD0_DC_HPD_INT_CONTROL__DC_HPD_INT_POLARITY_MASK 0x00000100L +#define HPD0_DC_HPD_INT_STATUS__DC_HPD_SENSE_DELAYED__SHIFT 0x4 +#define HPD0_DC_HPD_INT_CONTROL__DC_HPD_INT_POLARITY__SHIFT 0x8 +//HPD1_DC_HPD_INT_STATUS +#define DC_HPD1_INT_STATUS__DC_HPD1_SENSE_DELAYED_MASK 0x10 +#define DC_HPD1_INT_STATUS__DC_HPD1_SENSE_DELAYED__SHIFT 0x4 +#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK 0x100 +#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY__SHIFT 0x8 #define CTX \ @@ -177,3 +187,57 @@ enum dc_irq_source dal_irq_service_to_irq_source( src_id, ext_id); } + +bool hpd0_ack( + struct irq_service *irq_service, + const struct irq_source_info *info) +{ + uint32_t addr = info->status_reg; + uint32_t value = dm_read_reg(irq_service->ctx, addr); + uint32_t current_status = + get_reg_field_value( + value, + HPD0_DC_HPD_INT_STATUS, + DC_HPD_SENSE_DELAYED); + + dal_irq_service_ack_generic(irq_service, info); + + value = dm_read_reg(irq_service->ctx, info->enable_reg); + + set_reg_field_value( + value, + current_status ? 0 : 1, + HPD0_DC_HPD_INT_CONTROL, + DC_HPD_INT_POLARITY); + + dm_write_reg(irq_service->ctx, info->enable_reg, value); + + return true; +} + +bool hpd1_ack( + struct irq_service *irq_service, + const struct irq_source_info *info) +{ + uint32_t addr = info->status_reg; + uint32_t value = dm_read_reg(irq_service->ctx, addr); + uint32_t current_status = + get_reg_field_value( + value, + DC_HPD1_INT_STATUS, + DC_HPD1_SENSE_DELAYED); + + dal_irq_service_ack_generic(irq_service, info); + + value = dm_read_reg(irq_service->ctx, info->enable_reg); + + set_reg_field_value( + value, + current_status ? 0 : 1, + DC_HPD1_INT_CONTROL, + DC_HPD1_INT_POLARITY); + + dm_write_reg(irq_service->ctx, info->enable_reg, value); + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h index b178f85944cd..bbcef3d2fe33 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h @@ -82,4 +82,12 @@ void dal_irq_service_set_generic( const struct irq_source_info *info, bool enable); +bool hpd0_ack( + struct irq_service *irq_service, + const struct irq_source_info *info); + +bool hpd1_ack( + struct irq_service *irq_service, + const struct irq_source_info *info); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index 110f656d43ae..a2f7b933bebf 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -161,6 +161,20 @@ enum dc_irq_source { DC_IRQ_SOURCE_DPCX_TX_PHYE, DC_IRQ_SOURCE_DPCX_TX_PHYF, + DC_IRQ_SOURCE_DC1_VLINE2, + DC_IRQ_SOURCE_DC2_VLINE2, + DC_IRQ_SOURCE_DC3_VLINE2, + DC_IRQ_SOURCE_DC4_VLINE2, + DC_IRQ_SOURCE_DC5_VLINE2, + DC_IRQ_SOURCE_DC6_VLINE2, + + DC_IRQ_SOURCE_DCI2C_RR_DDC1, + DC_IRQ_SOURCE_DCI2C_RR_DDC2, + DC_IRQ_SOURCE_DCI2C_RR_DDC3, + DC_IRQ_SOURCE_DCI2C_RR_DDC4, + DC_IRQ_SOURCE_DCI2C_RR_DDC5, + DC_IRQ_SOURCE_DCI2C_RR_DDC6, + DAL_IRQ_SOURCES_NUMBER }; @@ -170,6 +184,8 @@ enum irq_type IRQ_TYPE_VUPDATE = DC_IRQ_SOURCE_VUPDATE1, IRQ_TYPE_VBLANK = DC_IRQ_SOURCE_VBLANK1, IRQ_TYPE_VLINE0 = DC_IRQ_SOURCE_DC1_VLINE0, + IRQ_TYPE_VLINE1 = DC_IRQ_SOURCE_DC1_VLINE1, + IRQ_TYPE_VLINE2 = DC_IRQ_SOURCE_DC1_VLINE2, IRQ_TYPE_DCUNDERFLOW = DC_IRQ_SOURCE_DC1UNDERFLOW, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index cc9191a5c9e6..9655e6fa53a4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -611,6 +611,7 @@ static bool detect_dp(struct dc_link *link, link->dpcd_caps.dongle_type = sink_caps->dongle_type; link->dpcd_caps.is_dongle_type_one = sink_caps->is_dongle_type_one; link->dpcd_caps.dpcd_rev.raw = 0; + link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.raw = 0; } return true; @@ -1007,21 +1008,11 @@ static bool detect_link_and_local_sink(struct dc_link *link, link->reported_link_cap.link_rate > LINK_RATE_HIGH3) link->reported_link_cap.link_rate = LINK_RATE_HIGH3; - /* - * If this is DP over USB4 link then we need to: - * - Enable BW ALLOC support on DPtx if applicable - */ - if (dc->config.usb4_bw_alloc_support) { - if (link_dp_dpia_set_dptx_usb4_bw_alloc_support(link)) { - /* update with non reduced link cap if bw allocation mode is supported */ - if (link->dpia_bw_alloc_config.nrd_max_link_rate && - link->dpia_bw_alloc_config.nrd_max_lane_count) { - link->reported_link_cap.link_rate = - link->dpia_bw_alloc_config.nrd_max_link_rate; - link->reported_link_cap.lane_count = - link->dpia_bw_alloc_config.nrd_max_lane_count; - } - } + if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling + && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc + && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support) { + if (link_dpia_enable_usb4_dp_bw_alloc_mode(link) == false) + link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc = false; } break; } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 53c961f86d43..273a3be6d593 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2374,7 +2374,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) update_psp_stream_config(pipe_ctx, true); dc->hwss.blank_stream(pipe_ctx); - if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + if (pipe_ctx->link_config.dp_tunnel_settings.should_use_dp_bw_allocation) deallocate_usb4_bandwidth(pipe_ctx->stream); if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) @@ -2442,7 +2442,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (link->connector_signal == SIGNAL_TYPE_EDP && dc->debug.psp_disabled_wa) { /* reset internal save state to default since eDP is off */ enum dp_panel_mode panel_mode = dp_get_panel_mode(pipe_ctx->stream->link); - /* since current psp not loaded, we need to reset it to default*/ + /* since current psp not loaded, we need to reset it to default */ link->panel_mode = panel_mode; } } @@ -2620,7 +2620,7 @@ void link_set_dpms_on( if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_set_hblank_reduction_on_rx(pipe_ctx); - if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + if (pipe_ctx->link_config.dp_tunnel_settings.should_use_dp_bw_allocation) allocate_usb4_bandwidth(pipe_ctx->stream); if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index f6b6b19e7481..1a04f4b74585 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -156,6 +156,7 @@ static void construct_link_service_dp_capability(struct link_service *link_srv) link_srv->dp_get_encoding_format = link_dp_get_encoding_format; link_srv->dp_should_enable_fec = dp_should_enable_fec; link_srv->dp_decide_link_settings = link_decide_link_settings; + link_srv->dp_decide_tunnel_settings = link_decide_dp_tunnel_settings; link_srv->mst_decide_link_encoding_format = mst_decide_link_encoding_format; link_srv->edp_decide_link_settings = edp_decide_link_settings; @@ -464,6 +465,7 @@ static bool construct_phy(struct dc_link *link, link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID; + link->irq_source_read_request = DC_IRQ_SOURCE_INVALID; link->link_status.dpcd_caps = &link->dpcd_caps; link->dc = init_params->dc; @@ -514,6 +516,9 @@ static bool construct_phy(struct dc_link *link, case CONNECTOR_ID_HDMI_TYPE_A: link->connector_signal = SIGNAL_TYPE_HDMI_TYPE_A; + if (link->hpd_gpio) + link->irq_source_read_request = + dal_irq_get_read_request(link->hpd_gpio); break; case CONNECTOR_ID_SINGLE_LINK_DVID: case CONNECTOR_ID_SINGLE_LINK_DVII: @@ -653,7 +658,7 @@ static bool construct_phy(struct dc_link *link, } /* Look for device tag that matches connector signal, - * CRT for rgb, LCD for other supported signal tyes + * CRT for rgb, LCD for other supported signal types */ if (!bp_funcs->is_device_id_supported(dc_ctx->dc_bios, link->device_tag.dev_id)) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 21ee0d96c9d4..8f79881ad9f1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -158,6 +158,14 @@ uint8_t dp_parse_lttpr_repeater_count(uint8_t lttpr_repeater_count) return 0; // invalid value } +uint32_t dp_get_closest_lttpr_offset(uint8_t lttpr_count) +{ + /* Calculate offset for LTTPR closest to DPTX which is highest in the chain + * Offset is 0 for single LTTPR cases as base LTTPR DPCD addresses target LTTPR 1 + */ + return DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE * (lttpr_count - 1); +} + uint32_t link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw) { switch (bw) { @@ -2013,11 +2021,9 @@ static bool retrieve_link_cap(struct dc_link *link) sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw)); /* Read DP tunneling information. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - status = dpcd_get_tunneling_device_data(link); - if (status != DC_OK) - dm_error("%s: Read DP tunneling device data failed.\n", __func__); - } + status = dpcd_get_tunneling_device_data(link); + if (status != DC_OK) + dm_error("%s: Read DP tunneling device data failed.\n", __func__); retrieve_cable_id(link); dpcd_write_cable_id_to_dprx(link); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h index 0ce0af3ddbeb..940b147cc5d4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h @@ -48,6 +48,9 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link); /* Convert PHY repeater count read from DPCD uint8_t. */ uint8_t dp_parse_lttpr_repeater_count(uint8_t lttpr_repeater_count); +/* Calculate embedded LTTPR address offset for vendor-specific behaviour */ +uint32_t dp_get_closest_lttpr_offset(uint8_t lttpr_count); + bool dp_is_sink_present(struct dc_link *link); bool dp_is_lttpr_present(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 0d123e647652..22bfdced64ab 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -62,6 +62,36 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link) if (status != DC_OK) goto err; + link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.raw = + dpcd_dp_tun_data[DP_TUNNELING_CAPABILITIES_SUPPORT - DP_TUNNELING_CAPABILITIES_SUPPORT]; + + if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling == false) + goto err; + + link->dpcd_caps.usb4_dp_tun_info.dpia_info.raw = + dpcd_dp_tun_data[DP_IN_ADAPTER_INFO - DP_TUNNELING_CAPABILITIES_SUPPORT]; + link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id = + dpcd_dp_tun_data[DP_USB4_DRIVER_ID - DP_TUNNELING_CAPABILITIES_SUPPORT]; + + if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc) { + status = core_link_read_dpcd(link, USB4_DRIVER_BW_CAPABILITY, + dpcd_dp_tun_data, 1); + + if (status != DC_OK) + goto err; + + link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.raw = dpcd_dp_tun_data[0]; + } + + DC_LOG_DEBUG("%s: Link[%d] DP tunneling support (RouterId=%d AdapterId=%d) " + "DPIA_BW_Alloc_support=%d " + "CM_BW_Alloc_support=%d ", + __func__, link->link_index, + link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id, + link->dpcd_caps.usb4_dp_tun_info.dpia_info.bits.dpia_num, + link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc, + link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support); + status = core_link_read_dpcd( link, DP_USB4_ROUTER_TOPOLOGY_ID, @@ -71,13 +101,6 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link) if (status != DC_OK) goto err; - link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.raw = - dpcd_dp_tun_data[DP_TUNNELING_CAPABILITIES_SUPPORT - DP_TUNNELING_CAPABILITIES_SUPPORT]; - link->dpcd_caps.usb4_dp_tun_info.dpia_info.raw = - dpcd_dp_tun_data[DP_IN_ADAPTER_INFO - DP_TUNNELING_CAPABILITIES_SUPPORT]; - link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id = - dpcd_dp_tun_data[DP_USB4_DRIVER_ID - DP_TUNNELING_CAPABILITIES_SUPPORT]; - for (i = 0; i < DPCD_USB4_TOPOLOGY_ID_LEN; i++) link->dpcd_caps.usb4_dp_tun_info.usb4_topology_id[i] = dpcd_topology_data[i]; @@ -92,6 +115,7 @@ bool dpia_query_hpd_status(struct dc_link *link) /* prepare QUERY_HPD command */ cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; + cmd.query_hpd.header.payload_bytes = sizeof(cmd.query_hpd.data); cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; @@ -119,3 +143,20 @@ bool dpia_query_hpd_status(struct dc_link *link) return link->hpd_status; } +void link_decide_dp_tunnel_settings(struct dc_stream_state *stream, + struct dc_tunnel_settings *dp_tunnel_setting) +{ + struct dc_link *link = stream->link; + + memset(dp_tunnel_setting, 0, sizeof(*dp_tunnel_setting)); + + if ((stream->signal == SIGNAL_TYPE_DISPLAY_PORT) || (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) { + dp_tunnel_setting->should_enable_dp_tunneling = + link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling; + + if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc + && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support) + dp_tunnel_setting->should_use_dp_bw_allocation = true; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h index 363f45a1a964..a61edfc9ca7a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h @@ -38,4 +38,10 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link); * Returns true if HPD high. */ bool dpia_query_hpd_status(struct dc_link *link); + +/* Decide the DP tunneling settings based on the DPCD capabilities + */ +void link_decide_dp_tunnel_settings(struct dc_stream_state *stream, + struct dc_tunnel_settings *dp_tunnel_setting); + #endif /* __DC_LINK_DPIA_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index a254ead2f7e8..3af7564a84f1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -46,9 +46,10 @@ */ static bool link_dp_is_bw_alloc_available(struct dc_link *link) { - return (link && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA - && link->hpd_status - && link->dpia_bw_alloc_config.bw_alloc_enabled); + return (link && link->hpd_status + && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling + && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc + && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support); } static void reset_bw_alloc_struct(struct dc_link *link) @@ -141,7 +142,7 @@ static int get_non_reduced_max_lane_count(struct dc_link *link) * granuality, Driver_ID, CM_Group, & populate the BW allocation structs * for host router and dpia */ -static void init_usb4_bw_struct(struct dc_link *link) +static void retrieve_usb4_dp_bw_allocation_info(struct dc_link *link) { reset_bw_alloc_struct(link); @@ -282,49 +283,26 @@ static void link_dpia_send_bw_alloc_request(struct dc_link *link, int req_bw) // ------------------------------------------------------------------ // PUBLIC FUNCTIONS // ------------------------------------------------------------------ -bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link) +bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link) { bool ret = false; - uint8_t response = 0, - bw_support_dpia = 0, - bw_support_cm = 0; + uint8_t val; - if (!(link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->hpd_status)) - goto out; + if (link->hpd_status) { + val = DPTX_BW_ALLOC_MODE_ENABLE | DPTX_BW_ALLOC_UNMASK_IRQ; - if (core_link_read_dpcd( - link, - DP_TUNNELING_CAPABILITIES, - &response, - sizeof(uint8_t)) == DC_OK) - bw_support_dpia = (response >> 7) & 1; + if (core_link_write_dpcd(link, DPTX_BW_ALLOCATION_MODE_CONTROL, &val, sizeof(uint8_t)) == DC_OK) { + DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode enabled", __func__, link->link_index); - if (core_link_read_dpcd( - link, - USB4_DRIVER_BW_CAPABILITY, - &response, - sizeof(uint8_t)) == DC_OK) - bw_support_cm = (response >> 7) & 1; + retrieve_usb4_dp_bw_allocation_info(link); - /* Send request acknowledgment to Turn ON DPTX support */ - if (bw_support_cm && bw_support_dpia) { + if (link->dpia_bw_alloc_config.nrd_max_link_rate && link->dpia_bw_alloc_config.nrd_max_lane_count) { + link->reported_link_cap.link_rate = link->dpia_bw_alloc_config.nrd_max_link_rate; + link->reported_link_cap.lane_count = link->dpia_bw_alloc_config.nrd_max_lane_count; + } - response = 0x80; - if (core_link_write_dpcd( - link, - DPTX_BW_ALLOCATION_MODE_CONTROL, - &response, - sizeof(uint8_t)) != DC_OK) { - DC_LOG_DEBUG("%s: FAILURE Enabling DPtx BW Allocation Mode Support for link(%d)\n", - __func__, link->link_index); - } else { - // SUCCESS Enabled DPtx BW Allocation Mode Support - DC_LOG_DEBUG("%s: SUCCESS Enabling DPtx BW Allocation Mode Support for link(%d)\n", - __func__, link->link_index); - - ret = true; - init_usb4_bw_struct(link); link->dpia_bw_alloc_config.bw_alloc_enabled = true; + ret = true; /* * During DP tunnel creation, CM preallocates BW and reduces estimated BW of other @@ -332,11 +310,12 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link) * to make the CM to release preallocation and update estimated BW correctly for * all DPIAs per host router */ + // TODO: Zero allocation can be removed once the MSFT CM fix has been released link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, 0); - } + } else + DC_LOG_DEBUG("%s: link[%d] failed to enable DPTX BW allocation mode", __func__, link->link_index); } -out: return ret; } @@ -378,7 +357,8 @@ void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status) */ void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw) { - if (link && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->dpia_bw_alloc_config.bw_alloc_enabled) { + if (link && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling + && link->dpia_bw_alloc_config.bw_alloc_enabled) { //1. Hot Plug if (link->hpd_status && peak_bw > 0) { // If DP over USB4 then we need to check BW allocation @@ -401,7 +381,7 @@ void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r if (link_dp_is_bw_alloc_available(link)) link_dpia_send_bw_alloc_request(link, req_bw); else - DC_LOG_DEBUG("%s: Not able to send the BW Allocation request", __func__); + DC_LOG_DEBUG("%s: BW Allocation mode not available", __func__); } bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 6df9b946b00f..801965b5f9a4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -43,13 +43,13 @@ enum bw_type { }; /* - * Enable BW Allocation Mode Support from the DP-Tx side + * Enable USB4 DP BW allocation mode * * @link: pointer to the dc_link struct instance * * return: SUCCESS or FAILURE */ -bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link); +bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link); /* * Allocates only what the stream needs for bw, so if: diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c index 5be00e4ce10b..693477413347 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c @@ -229,6 +229,10 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) link->replay_settings.config.replay_error_status.raw |= replay_error_status.raw; + /* Increment desync error counter if a desync error is detected */ + if (replay_configuration.bits.DESYNC_ERROR_STATUS) + link->replay_settings.replay_desync_error_fail_count++; + if (link->replay_settings.config.force_disable_desync_error_check) return; @@ -240,9 +244,6 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link) &replay_configuration.raw, sizeof(replay_configuration.raw)); - /* Update desync error counter */ - link->replay_settings.replay_desync_error_fail_count++; - /* Acknowledge and clear error bits */ dm_helpers_dp_write_dpcd( link->ctx, @@ -351,7 +352,7 @@ enum dc_status dp_read_hpd_rx_irq_data( irq_data->raw, DP_SINK_STATUS - DP_SINK_COUNT + 1); - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling) { retval = core_link_read_dpcd( link, DP_LINK_SERVICE_IRQ_VECTOR_ESI0, &irq_data->bytes.link_service_irq_esi0.raw, 1); @@ -520,7 +521,7 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link, dp_trace_link_loss_increment(link); } - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling) { if (hpd_irq_dpcd_data.bytes.link_service_irq_esi0.bits.DP_LINK_TUNNELING_IRQ) dp_handle_tunneling_irq(link); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index ef358afdfb65..2dc1a660e504 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -785,7 +785,6 @@ void override_training_settings( lt_settings->lttpr_mode = LTTPR_MODE_NON_LTTPR; dp_get_lttpr_mode_override(link, <_settings->lttpr_mode); - } enum dc_dp_training_pattern decide_cr_training_pattern( diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 5a5d48fadbf2..66d0fb1b9b9d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -142,6 +142,14 @@ void decide_8b_10b_training_settings( lt_settings->lttpr_mode = dp_decide_8b_10b_lttpr_mode(link); lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting, lt_settings->lttpr_mode); dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); + + /* Some embedded LTTPRs rely on receiving TPS2 before LT to interop reliably with sensitive VGA dongles + * This allows these LTTPRs to minimize freq/phase and skew variation during lock and deskew sequences + */ + if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == + AMD_EXT_DISPLAY_PATH_CAPS__DP_EARLY_8B10B_TPS2) { + lt_settings->lttpr_early_tps2 = true; + } } enum lttpr_mode dp_decide_8b_10b_lttpr_mode(struct dc_link *link) @@ -173,6 +181,42 @@ enum lttpr_mode dp_decide_8b_10b_lttpr_mode(struct dc_link *link) return LTTPR_MODE_NON_LTTPR; } +static void set_link_settings_and_perform_early_tps2_retimer_pre_lt_sequence(struct dc_link *link, + const struct link_resource *link_res, + struct link_training_settings *lt_settings, + uint32_t lttpr_count) +{ + /* Vendor-specific LTTPR early TPS2 sequence: + * 1. Output TPS2 + * 2. Wait 400us + * 3. Set link settings as usual + * 4. Write TPS1 to DP_TRAINING_PATTERN_SET_PHY_REPEATERx targeting LTTPR closest to host + * 5. Wait 1ms + * 6. Begin link training as usual + * */ + + uint32_t closest_lttpr_address_offset = dp_get_closest_lttpr_offset(lttpr_count); + + union dpcd_training_pattern dpcd_pattern = {0}; + + dpcd_pattern.v1_4.TRAINING_PATTERN_SET = 1; + dpcd_pattern.v1_4.SCRAMBLING_DISABLE = 1; + + DC_LOG_HW_LINK_TRAINING("%s\n GPU sends TPS2. Wait 400us.\n", __func__); + + dp_set_hw_training_pattern(link, link_res, DP_TRAINING_PATTERN_SEQUENCE_2, DPRX); + + dp_set_hw_lane_settings(link, link_res, lt_settings, DPRX); + + udelay(400); + + dpcd_set_link_settings(link, lt_settings); + + core_link_write_dpcd(link, DP_TRAINING_PATTERN_SET_PHY_REPEATER1 + closest_lttpr_address_offset, &dpcd_pattern.raw, 1); + + udelay(1000); + } + enum link_training_result perform_8b_10b_clock_recovery_sequence( struct dc_link *link, const struct link_resource *link_res, @@ -383,7 +427,7 @@ enum link_training_result dp_perform_8b_10b_link_training( { enum link_training_result status = LINK_TRAINING_SUCCESS; - uint8_t repeater_cnt; + uint8_t repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); uint8_t repeater_id; uint8_t lane = 0; @@ -391,14 +435,16 @@ enum link_training_result dp_perform_8b_10b_link_training( start_clock_recovery_pattern_early(link, link_res, lt_settings, DPRX); /* 1. set link rate, lane count and spread. */ - dpcd_set_link_settings(link, lt_settings); + if (lt_settings->lttpr_early_tps2) + set_link_settings_and_perform_early_tps2_retimer_pre_lt_sequence(link, link_res, lt_settings, repeater_cnt); + else + dpcd_set_link_settings(link, lt_settings); if (lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) { /* 2. perform link training (set link training done * to false is done as well) */ - repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); for (repeater_id = repeater_cnt; (repeater_id > 0 && status == LINK_TRAINING_SUCCESS); repeater_id--) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 1e4adbc764ea..da74c2b5854f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -524,7 +524,7 @@ bool edp_set_backlight_level(const struct dc_link *link, struct dc *dc = link->ctx->dc; uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16; uint32_t frame_ramp = backlight_level_params->frame_ramp; - DC_LOGGER_INIT(link->ctx->logger); + DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); @@ -1022,6 +1022,9 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream &alpm_config.raw, sizeof(alpm_config.raw)); } + + link->replay_settings.config.replay_video_conferencing_optimization_enabled = false; + return true; } @@ -1130,11 +1133,11 @@ static struct abm *get_abm_from_stream_res(const struct dc_link *link) struct abm *abm = NULL; for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i]; - struct dc_stream_state *stream = pipe_ctx.stream; + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + struct dc_stream_state *stream = pipe_ctx->stream; if (stream && stream->link == link) { - abm = pipe_ctx.stream_res.abm; + abm = pipe_ctx->stream_res.abm; break; } } diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c index a0e9e9f0441a..b4cea2b8cb2a 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c @@ -370,124 +370,126 @@ void mpc32_program_shaper_luta_settings( MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y); curve = params->arr_curve_points; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0, + if (curve) { + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_0_1[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_2_3[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_4_5[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_6_7[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_8_9[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_10_11[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_12_13[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_14_15[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_16_17[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_18_19[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_20_21[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_22_23[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_24_25[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_26_27[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_28_29[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_30_31[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMA_REGION_32_33[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + } } @@ -520,125 +522,127 @@ void mpc32_program_shaper_lutb_settings( MPCC_MCM_SHAPER_RAMA_EXP_REGION_END_BASE_B, params->corner_points[1].red.custom_float_y); curve = params->arr_curve_points; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0, + if (curve) { + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_0_1[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_2_3[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0, + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_4_5[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_6_7[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_8_9[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_10_11[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_12_13[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_14_15[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_16_17[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_18_19[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_20_21[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_22_23[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_24_25[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_26_27[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_28_29[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_30_31[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); - curve += 2; - REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, - MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + curve += 2; + REG_SET_4(MPCC_MCM_SHAPER_RAMB_REGION_32_33[mpcc_id], 0, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_LUT_OFFSET, curve[0].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION0_NUM_SEGMENTS, curve[0].segments_num, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_LUT_OFFSET, curve[1].offset, + MPCC_MCM_SHAPER_RAMA_EXP_REGION1_NUM_SEGMENTS, curve[1].segments_num); + } } diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c index ad67197557ca..98cf0cbd59ba 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c @@ -47,16 +47,6 @@ void mpc401_update_3dlut_fast_load_select(struct mpc *mpc, int mpcc_id, int hubp REG_SET(MPCC_MCM_3DLUT_FAST_LOAD_SELECT[mpcc_id], 0, MPCC_MCM_3DLUT_FL_SEL, hubp_idx); } -void mpc401_get_3dlut_fast_load_status(struct mpc *mpc, int mpcc_id, uint32_t *done, uint32_t *soft_underflow, uint32_t *hard_underflow) -{ - struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); - - REG_GET_3(MPCC_MCM_3DLUT_FAST_LOAD_STATUS[mpcc_id], - MPCC_MCM_3DLUT_FL_DONE, done, - MPCC_MCM_3DLUT_FL_SOFT_UNDERFLOW, soft_underflow, - MPCC_MCM_3DLUT_FL_HARD_UNDERFLOW, hard_underflow); -} - void mpc401_set_movable_cm_location(struct mpc *mpc, enum mpcc_movable_cm_location location, int mpcc_id) { struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); @@ -618,7 +608,6 @@ static const struct mpc_funcs dcn401_mpc_funcs = { .set_bg_color = mpc1_set_bg_color, .set_movable_cm_location = mpc401_set_movable_cm_location, .update_3dlut_fast_load_select = mpc401_update_3dlut_fast_load_select, - .get_3dlut_fast_load_status = mpc401_get_3dlut_fast_load_status, .populate_lut = mpc401_populate_lut, .program_lut_read_write_control = mpc401_program_lut_read_write_control, .program_lut_mode = mpc401_program_lut_mode, diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h index ce6fbcf14d7a..8e35ebc603a9 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h @@ -241,23 +241,9 @@ void mpc401_update_3dlut_fast_load_select( int mpcc_id, int hubp_idx); -void mpc401_get_3dlut_fast_load_status( - struct mpc *mpc, - int mpcc_id, - uint32_t *done, - uint32_t *soft_underflow, - uint32_t *hard_underflow); - void mpc401_update_3dlut_fast_load_select( struct mpc *mpc, int mpcc_id, int hubp_idx); -void mpc401_get_3dlut_fast_load_status( - struct mpc *mpc, - int mpcc_id, - uint32_t *done, - uint32_t *soft_underflow, - uint32_t *hard_underflow); - #endif diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index b86fe2b094f8..4cfc6c0fa147 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -507,6 +507,7 @@ void dcn35_timing_generator_init(struct optc *optc1) optc1->min_v_blank_interlace = 5; optc1->min_h_sync_width = 4; optc1->min_v_sync_width = 1; + optc1->max_frame_count = 0xFFFFFF; dcn35_timing_generator_set_fgcg( optc1, CTX->dc->debug.enable_fine_grain_clock_gating.bits.optc); diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile index b8cddef6b3d2..5b42da8b79c2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/Makefile +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -27,6 +27,24 @@ # DCE ############################################################################### +ifdef CONFIG_DRM_AMD_DC_SI +RESOURCE_DCE60 = dce60_resource.o + +AMD_DAL_RESOURCE_DCE60 = $(addprefix $(AMDDALPATH)/dc/resource/dce60/,$(RESOURCE_DCE60)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE60) +endif + +############################################################################### + +RESOURCE_DCE80 = dce80_resource.o + +AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80)) + +AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80) + +############################################################################### + RESOURCE_DCE100 = dce100_resource.o AMD_DAL_RESOURCE_DCE100 = $(addprefix $(AMDDALPATH)/dc/resource/dce100/,$(RESOURCE_DCE100)) @@ -57,14 +75,6 @@ AMD_DAL_RESOURCE_DCE120 = $(addprefix $(AMDDALPATH)/dc/resource/dce120/,$(RESOUR AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE120) -############################################################################### - -RESOURCE_DCE80 = dce80_resource.o - -AMD_DAL_RESOURCE_DCE80 = $(addprefix $(AMDDALPATH)/dc/resource/dce80/,$(RESOURCE_DCE80)) - -AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCE80) - ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### # DCN diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index e698543ec937..84f73fdb0f95 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -836,7 +836,7 @@ static enum dc_status build_mapped_resource( return DC_OK; } -static bool dce100_validate_bandwidth( +static enum dc_status dce100_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -858,7 +858,7 @@ static bool dce100_validate_bandwidth( context->bw_ctx.bw.dce.yclk_khz = 0; } - return true; + return DC_OK; } static bool dce100_validate_surface_sets( @@ -1069,7 +1069,7 @@ static bool dce100_resource_construct( pool->base.timing_generator_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 40; - dc->caps.i2c_speed_in_khz = 40; + dc->caps.i2c_speed_in_khz_hdcp = 40; dc->caps.max_cursor_size = 128; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dual_link_dvi = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index 035c6cfdaee5..f3d5baac11bf 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -960,7 +960,7 @@ static enum dc_status build_mapped_resource( return DC_OK; } -static bool dce110_validate_bandwidth( +static enum dc_status dce110_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -1031,7 +1031,7 @@ static bool dce110_validate_bandwidth( context->bw_ctx.bw.dce.yclk_khz, context->bw_ctx.bw.dce.blackout_recovery_time_us); } - return result; + return result ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } static enum dc_status dce110_validate_plane(const struct dc_plane_state *plane_state, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 480a50967385..4225cae68c10 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -883,7 +883,7 @@ static enum dc_status build_mapped_resource( return DC_OK; } -bool dce112_validate_bandwidth( +enum dc_status dce112_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -952,7 +952,7 @@ bool dce112_validate_bandwidth( context->bw_ctx.bw.dce.yclk_khz, context->bw_ctx.bw.dce.blackout_recovery_time_us); } - return result; + return result ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } enum dc_status resource_map_phy_clock_resources( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h index 1f57ebc6f9b4..6221d749246d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h @@ -42,7 +42,7 @@ enum dc_status dce112_validate_with_context( struct dc_state *context, struct dc_state *old_context); -bool dce112_validate_bandwidth( +enum dc_status dce112_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate); diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c similarity index 99% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c rename to drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 889f314cac65..d9ffdded5ce1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -48,7 +48,7 @@ #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" -#include "dce60/dce60_hw_sequencer.h" +#include "dce60/dce60_hwseq.h" #include "dce100/dce100_resource.h" #include "dce/dce_panel_cntl.h" @@ -863,7 +863,7 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool) } } -static bool dce60_validate_bandwidth( +static enum dc_status dce60_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -885,7 +885,7 @@ static bool dce60_validate_bandwidth( context->bw_ctx.bw.dce.yclk_khz = 0; } - return true; + return DC_OK; } static bool dce60_validate_surface_sets( diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.h rename to drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.h diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 3d5113f010bb..bd5811f97531 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -869,7 +869,7 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool) } } -static bool dce80_validate_bandwidth( +static enum dc_status dce80_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -891,7 +891,7 @@ static bool dce80_validate_bandwidth( context->bw_ctx.bw.dce.yclk_khz = 0; } - return true; + return DC_OK; } static bool dce80_validate_surface_sets( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index e92f14d50adb..be4ade0853e9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -23,6 +23,7 @@ * */ +#include "core_status.h" #include "dm_services.h" #include "dc.h" @@ -1125,7 +1126,7 @@ static void dcn10_destroy_resource_pool(struct resource_pool **pool) *pool = NULL; } -static bool dcn10_validate_bandwidth( +static enum dc_status dcn10_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -1136,7 +1137,7 @@ static bool dcn10_validate_bandwidth( voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate); DC_FP_END(); - return voltage_supported; + return voltage_supported ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps) @@ -1245,6 +1246,10 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( if (link->ep_type == DISPLAY_ENDPOINT_PHY && pool->stream_enc[i]->id == link->link_enc->preferred_engine) return pool->stream_enc[i]; + + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && pool->stream_enc[i]->id == + link->dpia_preferred_eng_id) + return pool->stream_enc[i]; } } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index e4eca3e32c1b..3405be07f5e3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2124,7 +2124,7 @@ validate_out: return out; } -bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, +enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { bool voltage_supported; @@ -2132,14 +2132,14 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); if (!pipes) - return false; + return DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate, pipes); DC_FP_END(); kfree(pipes); - return voltage_supported; + return voltage_supported ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } struct pipe_ctx *dcn20_acquire_free_pipe_for_layer( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h index 4cee3fa11a7f..c0e062c7407d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h @@ -119,7 +119,7 @@ void dcn20_set_mcif_arb_params( struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt); -bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); +enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); void dcn20_merge_pipes_for_validate( struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 4bd5c2278596..9ab01b65b177 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -923,7 +923,7 @@ validate_out: * with DC_FP_START()/DC_FP_END(). Use the same approach as for * dcn20_validate_bandwidth in dcn20_resource.c. */ -static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, +static enum dc_status dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { bool voltage_supported; @@ -931,14 +931,14 @@ static bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, pipes = kcalloc(dc->res_pool->pipe_count, sizeof(display_e2e_pipe_params_st), GFP_KERNEL); if (!pipes) - return false; + return DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate, pipes); DC_FP_END(); kfree(pipes); - return voltage_supported; + return voltage_supported ? DC_OK : DC_NOT_SUPPORTED; } static void dcn21_destroy_resource_pool(struct resource_pool **pool) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index f01ced015072..f631ae34e320 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -1891,8 +1891,6 @@ static int get_refresh_rate(struct dc_state *context) /* check if refresh rate at least 120hz */ timing = &context->streams[0]->timing; - if (timing == NULL) - return 0; h_v_total = timing->h_total * timing->v_total; if (h_v_total == 0) @@ -2037,7 +2035,7 @@ void dcn30_calculate_wm_and_dlg( DC_FP_END(); } -bool dcn30_validate_bandwidth(struct dc *dc, +enum dc_status dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -2094,7 +2092,7 @@ validate_out: BW_VAL_TRACE_FINISH(); - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h index 8e6b8b7368fd..689d9bdace81 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h @@ -56,7 +56,7 @@ unsigned int dcn30_calc_max_scaled_time( enum mmhubbub_wbif_mode mode, unsigned int urgent_watermark); -bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, +enum dc_status dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); bool dcn30_internal_validate_bw( struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index dddddbfef85f..7e0af5297dc4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1758,7 +1758,7 @@ dcn31_set_mcif_arb_params(struct dc *dc, DC_FP_END(); } -bool dcn31_validate_bandwidth(struct dc *dc, +enum dc_status dcn31_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -1813,7 +1813,7 @@ validate_out: BW_VAL_TRACE_FINISH(); - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } static void dcn31_get_panel_config_defaults(struct dc_panel_config *panel_config) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h index 551ad912f7be..dd82815d7efe 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h @@ -37,7 +37,7 @@ struct dcn31_resource_pool { struct resource_pool base; }; -bool dcn31_validate_bandwidth(struct dc *dc, +enum dc_status dcn31_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); void dcn31_calculate_wm_and_dlg( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 26becc4cb804..d96bc6cb73ad 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1694,7 +1694,7 @@ static void dcn314_get_panel_config_defaults(struct dc_panel_config *panel_confi *panel_config = panel_config_defaults; } -bool dcn314_validate_bandwidth(struct dc *dc, +enum dc_status dcn314_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -1750,7 +1750,7 @@ validate_out: BW_VAL_TRACE_FINISH(); - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } static struct resource_funcs dcn314_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h index 49ffe71018df..f8ba531d6342 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h @@ -39,7 +39,7 @@ struct dcn314_resource_pool { struct resource_pool base; }; -bool dcn314_validate_bandwidth(struct dc *dc, +enum dc_status dcn314_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 944650cb13de..bb0dae0be5b8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -24,6 +24,7 @@ * */ +#include "dc_types.h" #include "dm_services.h" #include "dc.h" @@ -1806,19 +1807,56 @@ validate_out: return out; } -bool dcn32_validate_bandwidth(struct dc *dc, +enum dc_status dcn32_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { - bool out = false; + unsigned int i; + enum dc_status status; + const struct dc_stream_state *stream; + + /* reset cursor limitations on subvp */ + for (i = 0; i < context->stream_count; i++) { + stream = context->streams[i]; + + if (dc_state_can_clear_stream_cursor_subvp_limit(stream, context)) { + dc_state_set_stream_cursor_subvp_limit(stream, context, false); + } + } if (dc->debug.using_dml2) - out = dml2_validate(dc, context, + status = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate); + fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; else - out = dml1_validate(dc, context, fast_validate); - return out; + status = dml1_validate(dc, context, fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + + if (!fast_validate && status == DC_OK && dc_state_is_subvp_in_use(context)) { + /* check new stream configuration still supports cursor if subvp used */ + for (i = 0; i < context->stream_count; i++) { + stream = context->streams[i]; + + if (dc_state_get_stream_subvp_type(context, stream) != SUBVP_PHANTOM && + stream->cursor_position.enable && + !dc_stream_check_cursor_attributes(stream, context, &stream->cursor_attributes)) { + /* hw cursor cannot be supported with subvp active, so disable subvp for now */ + dc_state_set_stream_cursor_subvp_limit(stream, context, true); + status = DC_FAIL_HW_CURSOR_SUPPORT; + } + }; + } + + if (!fast_validate && status == DC_FAIL_HW_CURSOR_SUPPORT) { + /* attempt to validate again with subvp disabled due to cursor */ + if (dc->debug.using_dml2) + status = dml2_validate(dc, context, + context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, + fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + else + status = dml1_validate(dc, context, fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + } + + return status; } int dcn32_populate_dml_pipes_from_context( @@ -2042,6 +2080,18 @@ static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw DC_FP_END(); } +unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc, + struct dc_state *state, + const struct dc_stream_state *stream) +{ + bool limit_cur_to_buf; + + limit_cur_to_buf = dc_state_get_stream_subvp_cursor_limit(stream, state) && + !stream->hw_cursor_req; + + return limit_cur_to_buf ? dc->caps.max_buffered_cursor_size : dc->caps.max_cursor_size; +} + static struct resource_funcs dcn32_res_pool_funcs = { .destroy = dcn32_destroy_resource_pool, .link_enc_create = dcn32_link_encoder_create, @@ -2067,7 +2117,8 @@ static struct resource_funcs dcn32_res_pool_funcs = { .add_phantom_pipes = dcn32_add_phantom_pipes, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -2151,6 +2202,7 @@ static bool dcn32_resource_construct( dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ /* TODO: Bring max_cursor_size back to 256 after subvp cursor corruption is fixed*/ dc->caps.max_cursor_size = 64; + dc->caps.max_buffered_cursor_size = 64; // sqrt(16 * 1024 / 4) dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; dc->caps.mall_size_per_mem_channel = 4; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 1aa4ced29291..d60ed77eda80 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -98,7 +98,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, unsigned int pipe_cnt, unsigned int index); -bool dcn32_validate_bandwidth(struct dc *dc, +enum dc_status dcn32_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); @@ -188,6 +188,10 @@ void dcn32_override_min_req_dcfclk(struct dc *dc, struct dc_state *context); unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned int total_size_in_mall_bytes); +unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc, + struct dc_state *state, + const struct dc_stream_state *stream); + /* definitions for run time init of reg offsets */ /* CLK SRC */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 38d76434683e..7db1f7a5613f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1624,7 +1624,8 @@ static struct resource_funcs dcn321_res_pool_funcs = { .add_phantom_pipes = dcn32_add_phantom_pipes, .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -1709,6 +1710,7 @@ static bool dcn321_resource_construct( dc->caps.i2c_speed_in_khz_hdcp = 100; /*1.4 w/a applied by default*/ /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/ dc->caps.max_cursor_size = 64; + dc->caps.max_buffered_cursor_size = 64; // sqrt(16 * 1024 / 4) dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; dc->caps.mall_size_per_mem_channel = 4; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index ffd2b816cd02..72c6cf047db0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1732,7 +1732,7 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config } -static bool dcn35_validate_bandwidth(struct dc *dc, +static enum dc_status dcn35_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -1743,13 +1743,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc, fast_validate); if (fast_validate) - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); dcn35_decide_zstate_support(dc, context); DC_FP_END(); - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state) @@ -1903,7 +1903,7 @@ static bool dcn35_resource_construct( dc->caps.max_disp_clock_khz_at_vmin = 650000; /* Sequential ONO is based on ASIC. */ - if (dc->ctx->asic_id.hw_internal_rev > 0x10) + if (dc->ctx->asic_id.hw_internal_rev >= 0x40) dc->caps.sequential_ono = true; /* Use pipe context based otg sync logic */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 98f5bc1b929e..989a270f7dea 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1712,7 +1712,7 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config } -static bool dcn351_validate_bandwidth(struct dc *dc, +static enum dc_status dcn351_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -1723,13 +1723,13 @@ static bool dcn351_validate_bandwidth(struct dc *dc, fast_validate); if (fast_validate) - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); dcn35_decide_zstate_support(dc, context); DC_FP_END(); - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } static struct resource_funcs dcn351_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index b6468573dc33..48e1f234185f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -1713,7 +1713,7 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config } -static bool dcn35_validate_bandwidth(struct dc *dc, +static enum dc_status dcn35_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -1724,13 +1724,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc, fast_validate); if (fast_validate) - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); dcn35_decide_zstate_support(dc, context); DC_FP_END(); - return out; + return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } @@ -1876,7 +1876,7 @@ static bool dcn36_resource_construct( dc->caps.max_disp_clock_khz_at_vmin = 650000; /* Sequential ONO is based on ASIC. */ - if (dc->ctx->asic_id.hw_internal_rev > 0x10) + if (dc->ctx->asic_id.hw_internal_rev >= 0x40) dc->caps.sequential_ono = true; /* Use pipe context based otg sync logic */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 7436dfbdf927..e0e32975ca34 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -1642,16 +1642,52 @@ enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_sta return DC_OK; } -bool dcn401_validate_bandwidth(struct dc *dc, +enum dc_status dcn401_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { - bool out = false; + unsigned int i; + enum dc_status status = DC_OK; + const struct dc_stream_state *stream; + + /* reset cursor limitations on subvp */ + for (i = 0; i < context->stream_count; i++) { + stream = context->streams[i]; + + if (dc_state_can_clear_stream_cursor_subvp_limit(stream, context)) { + dc_state_set_stream_cursor_subvp_limit(stream, context, false); + } + } + if (dc->debug.using_dml2) - out = dml2_validate(dc, context, + status = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate); - return out; + fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + + if (!fast_validate && status == DC_OK && dc_state_is_subvp_in_use(context)) { + /* check new stream configuration still supports cursor if subvp used */ + for (i = 0; i < context->stream_count; i++) { + stream = context->streams[i]; + + if (dc_state_get_stream_subvp_type(context, stream) != SUBVP_PHANTOM && + stream->cursor_position.enable && + !dc_stream_check_cursor_attributes(stream, context, &stream->cursor_attributes)) { + /* hw cursor cannot be supported with subvp active, so disable subvp for now */ + dc_state_set_stream_cursor_subvp_limit(stream, context, true); + status = DC_FAIL_HW_CURSOR_SUPPORT; + } + }; + } + + if (!fast_validate && status == DC_FAIL_HW_CURSOR_SUPPORT) { + /* attempt to validate again with subvp disabled due to cursor */ + if (dc->debug.using_dml2) + status = dml2_validate(dc, context, + context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, + fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + } + + return status; } void dcn401_prepare_mcache_programming(struct dc *dc, @@ -1770,7 +1806,8 @@ static struct resource_funcs dcn401_res_pool_funcs = { .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, .get_power_profile = dcn401_get_power_profile, - .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe, + .get_max_hw_cursor_size = dcn32_get_max_hw_cursor_size }; static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -1846,8 +1883,9 @@ static bool dcn401_resource_construct( dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 95; dc->caps.i2c_speed_in_khz_hdcp = 95; /*1.4 w/a applied by default*/ - /* TODO: Bring max cursor size back to 256 after subvp cursor corruption is fixed*/ + /* used to set cursor pitch, so must be aligned to power of 2 (HW actually supported 78x78) */ dc->caps.max_cursor_size = 64; + dc->caps.max_buffered_cursor_size = 64; dc->caps.cursor_not_scaled = true; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 4c259745d519..dc52a30991af 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -22,7 +22,7 @@ struct resource_pool *dcn401_create_resource_pool( enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state); -bool dcn401_validate_bandwidth(struct dc *dc, +enum dc_status dcn401_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c index 28348734d900..e0008c5f08ad 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c @@ -776,7 +776,7 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, * Do not bypass UV at 1:1 for cositing to be applied */ if (!enable_isharp) { - if (data->ratios.horz.value == one && data->ratios.vert.value == one) + if (data->ratios.horz.value == one && data->ratios.vert.value == one && !spl_in->basic_out.always_scale) return SCL_MODE_SCALING_420_LUMA_BYPASS; } @@ -884,7 +884,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, /* Calculate number of tap with adaptive scaling off */ static void spl_get_taps_non_adaptive_scaler( - struct spl_scratch *spl_scratch, const struct spl_taps *in_taps) + struct spl_scratch *spl_scratch, const struct spl_taps *in_taps, bool always_scale) { bool check_max_downscale = false; @@ -944,15 +944,15 @@ static void spl_get_taps_non_adaptive_scaler( spl_fixpt_from_fraction(6, 1)); SPL_ASSERT(check_max_downscale); - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) - spl_scratch->scl_data.taps.h_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert)) - spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) - spl_scratch->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) - spl_scratch->scl_data.taps.v_taps_c = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz) && !always_scale) + spl_scratch->scl_data.taps.h_taps = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert) && !always_scale) + spl_scratch->scl_data.taps.v_taps = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !always_scale) + spl_scratch->scl_data.taps.h_taps_c = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !always_scale) + spl_scratch->scl_data.taps.v_taps_c = 1; } /* Calculate optimal number of taps */ @@ -965,13 +965,15 @@ static bool spl_get_optimal_number_of_taps( unsigned int max_taps_y, max_taps_c; unsigned int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - bool skip_easf = false; + bool skip_easf = false; + bool always_scale = spl_in->basic_out.always_scale; bool is_subsampled = spl_is_subsampled_format(spl_in->basic_in.format); + if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && spl_scratch->scl_data.viewport.width > max_downscale_src_width) { - spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale); *enable_easf_v = false; *enable_easf_h = false; *enable_isharp = false; @@ -980,7 +982,7 @@ static bool spl_get_optimal_number_of_taps( /* Disable adaptive scaler and sharpener when integer scaling is enabled */ if (spl_in->scaling_quality.integer_scaling) { - spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale); *enable_easf_v = false; *enable_easf_h = false; *enable_isharp = false; @@ -996,7 +998,7 @@ static bool spl_get_optimal_number_of_taps( * taps = 4 for upscaling */ if (skip_easf) - spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale); else { if (spl_is_video_format(spl_in->basic_in.format)) { spl_scratch->scl_data.taps.h_taps = 6; @@ -1297,7 +1299,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s if (enable_easf_v) { dscl_prog_data->easf_v_en = true; dscl_prog_data->easf_v_ring = 0; - dscl_prog_data->easf_v_sharp_factor = 0; + dscl_prog_data->easf_v_sharp_factor = 1; dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode /* 2-bit, BF3 chroma mode correction calculation mode */ @@ -1461,7 +1463,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s if (enable_easf_h) { dscl_prog_data->easf_h_en = true; dscl_prog_data->easf_h_ring = 0; - dscl_prog_data->easf_h_sharp_factor = 0; + dscl_prog_data->easf_h_sharp_factor = 1; dscl_prog_data->easf_h_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_h_bf2_mode = @@ -1898,3 +1900,4 @@ bool SPL_NAMESPACE(spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out spl_set_taps_data(dscl_prog_data, data); return res; } + diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h index 1c3949b24611..36a284305a70 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h @@ -480,6 +480,10 @@ enum sharpness_setting { SHARPNESS_ZERO, SHARPNESS_CUSTOM }; +enum sharpness_range_source { + SHARPNESS_RANGE_DCN = 0, + SHARPNESS_RANGE_DCN_OVERRIDE +}; struct spl_sharpness_range { int sdr_rgb_min; int sdr_rgb_max; diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c index 52d97918a3bd..ebf0287417e0 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c @@ -29,8 +29,6 @@ static inline unsigned long long spl_complete_integer_division_u64( { unsigned long long result; - SPL_ASSERT(divisor); - result = spl_div64_u64_rem(dividend, divisor, remainder); return result; @@ -196,8 +194,6 @@ struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) * Good idea to use Newton's method */ - SPL_ASSERT(arg.value); - return spl_fixpt_from_fraction( spl_fixpt_one.value, arg.value); diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 4e0efff92dca..3f3fa1b6a69e 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -51,8 +51,8 @@ * for the cache windows. * * The call to dmub_srv_hw_init() programs the DMCUB registers to prepare - * for command submission. Commands can be queued via dmub_srv_cmd_queue() - * and executed via dmub_srv_cmd_execute(). + * for command submission. Commands can be queued via dmub_srv_fb_cmd_queue() + * and executed via dmub_srv_fb_cmd_execute(). * * If the queue is full the dmub_srv_wait_for_idle() call can be used to * wait until the queue has been cleared. @@ -142,6 +142,7 @@ enum dmub_notification_type { DMUB_NOTIFICATION_SET_CONFIG_REPLY, DMUB_NOTIFICATION_DPIA_NOTIFICATION, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, + DMUB_NOTIFICATION_FUSED_IO, DMUB_NOTIFICATION_MAX }; @@ -170,6 +171,13 @@ enum dmub_srv_power_state_type { DMUB_POWER_STATE_D3 = 8 }; +/* enum dmub_inbox_cmd_interface type - defines default interface for host->dmub commands */ +enum dmub_inbox_cmd_interface_type { + DMUB_CMD_INTERFACE_DEFAULT = 0, + DMUB_CMD_INTERFACE_FB = 1, + DMUB_CMD_INTERFACE_REG = 2, +}; + /** * struct dmub_region - dmub hw memory region * @base: base address for region, must be 256 byte aligned @@ -349,6 +357,21 @@ struct dmub_diagnostic_data { uint8_t is_cw6_enabled : 1; }; +struct dmub_srv_inbox { + /* generic status */ + uint64_t num_submitted; + uint64_t num_reported; + union { + /* frame buffer mailbox status */ + struct dmub_rb rb; + /* register mailbox status */ + struct { + bool is_pending; + bool is_multi_pending; + }; + }; +}; + /** * struct dmub_srv_base_funcs - Driver specific base callbacks */ @@ -422,6 +445,8 @@ struct dmub_srv_hw_funcs { uint32_t (*emul_get_inbox1_rptr)(struct dmub_srv *dmub); + uint32_t (*emul_get_inbox1_wptr)(struct dmub_srv *dmub); + void (*emul_set_inbox1_wptr)(struct dmub_srv *dmub, uint32_t wptr_offset); bool (*is_supported)(struct dmub_srv *dmub); @@ -462,18 +487,21 @@ struct dmub_srv_hw_funcs { void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx); void (*subvp_save_surf_addr)(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); + void (*send_reg_inbox0_cmd_msg)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); uint32_t (*read_reg_inbox0_rsp_int_status)(struct dmub_srv *dmub); void (*read_reg_inbox0_cmd_rsp)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void (*write_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); + void (*clear_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); + void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); + uint32_t (*read_reg_outbox0_rdy_int_status)(struct dmub_srv *dmub); void (*write_reg_outbox0_rdy_int_ack)(struct dmub_srv *dmub); void (*read_reg_outbox0_msg)(struct dmub_srv *dmub, uint32_t *msg); void (*write_reg_outbox0_rsp)(struct dmub_srv *dmub, uint32_t *rsp); uint32_t (*read_reg_outbox0_rsp_int_status)(struct dmub_srv *dmub); - void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); void (*enable_reg_outbox0_rdy_int)(struct dmub_srv *dmub, bool enable); }; @@ -493,6 +521,7 @@ struct dmub_srv_create_params { enum dmub_asic asic; uint32_t fw_version; bool is_virtual; + enum dmub_inbox_cmd_interface_type inbox_type; }; /** @@ -521,8 +550,9 @@ struct dmub_srv { const struct dmub_srv_dcn401_regs *regs_dcn401; struct dmub_srv_base_funcs funcs; struct dmub_srv_hw_funcs hw_funcs; - struct dmub_rb inbox1_rb; + struct dmub_srv_inbox inbox1; uint32_t inbox1_last_wptr; + struct dmub_srv_inbox reg_inbox0; /** * outbox1_rb is accessed without locks (dal & dc) * and to be used only in dmub_srv_stat_get_notification() @@ -542,6 +572,7 @@ struct dmub_srv { struct dmub_fw_meta_info meta_info; struct dmub_feature_caps feature_caps; struct dmub_visual_confirm_color visual_confirm_color; + enum dmub_inbox_cmd_interface_type inbox_type; enum dmub_srv_power_state_type power_state; struct dmub_diagnostic_data debug; @@ -566,11 +597,8 @@ struct dmub_notification { struct aux_reply_data aux_reply; enum dp_hpd_status hpd_status; enum set_config_status sc_status; - /** - * DPIA notification command. - */ - struct dmub_rb_cmd_dpia_notification dpia_notification; struct dmub_rb_cmd_hpd_sense_notify_data hpd_sense_notify; + struct dmub_cmd_fused_request fused_request; }; }; @@ -699,19 +727,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); /** - * dmub_srv_sync_inbox1() - sync sw state with hw state - * @dmub: the dmub service - * - * Sync sw state with hw state when resume from S0i3 - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); - -/** - * dmub_srv_cmd_queue() - queues a command to the DMUB + * dmub_srv_fb_cmd_queue() - queues a command to the DMUB * @dmub: the dmub service * @cmd: the command to queue * @@ -723,11 +739,11 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); * DMUB_STATUS_QUEUE_FULL - no remaining room in queue * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd); /** - * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub + * dmub_srv_fb_cmd_execute() - Executes a queued sequence to the dmub * @dmub: the dmub service * * Begins execution of queued commands on the dmub. @@ -736,7 +752,7 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, * DMUB_STATUS_OK - success * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); +enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub); /** * dmub_srv_wait_for_hw_pwr_up() - Waits for firmware hardware power up is completed @@ -794,6 +810,23 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub, uint32_t timeout_us); +/** + * dmub_srv_wait_for_pending() - Re-entrant wait for messages currently pending + * @dmub: the dmub service + * @timeout_us: the maximum number of microseconds to wait + * + * Waits until the commands queued prior to this call are complete. + * If interfaces remain busy due to additional work being submitted + * concurrently, this function will not continue to wait. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, + uint32_t timeout_us); + /** * dmub_srv_wait_for_idle() - Waits for the DMUB to be idle * @dmub: the dmub service @@ -892,9 +925,6 @@ enum dmub_status dmub_srv_get_fw_boot_status(struct dmub_srv *dmub, enum dmub_status dmub_srv_get_fw_boot_option(struct dmub_srv *dmub, union dmub_fw_boot_options *option); -enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd); - enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, bool skip); @@ -958,26 +988,6 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); */ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); -/** - * dmub_srv_send_reg_inbox0_cmd() - send a dmub command and wait for the command - * being processed by DMUB. - * @dmub: The dmub service - * @cmd: The dmub command being sent. If with_replay is true, the function will - * update cmd with replied data. - * @with_reply: true if DMUB reply needs to be copied back to cmd. false if the - * cmd doesn't need to be replied. - * @timeout_us: timeout in microseconds. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_TIMEOUT - DMUB fails to process the command within the timeout - * interval. - */ -enum dmub_status dmub_srv_send_reg_inbox0_cmd( - struct dmub_srv *dmub, - union dmub_rb_cmd *cmd, - bool with_reply, uint32_t timeout_us); - /** * dmub_srv_set_power_state() - Track DC power state in dmub_srv * @dmub: The dmub service @@ -990,4 +1000,71 @@ enum dmub_status dmub_srv_send_reg_inbox0_cmd( */ void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); +/** + * dmub_srv_reg_cmd_execute() - Executes provided command to the dmub + * @dmub: the dmub service + * @cmd: the command packet to be executed + * + * Executes a single command for the dmub. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); + + +/** + * dmub_srv_cmd_get_response() - Copies return data for command into buffer + * @dmub: the dmub service + * @cmd_rsp: response buffer + * + * Copies return data for command into buffer + */ +void dmub_srv_cmd_get_response(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd_rsp); + +/** + * dmub_srv_sync_inboxes() - Sync inbox state + * @dmub: the dmub service + * + * Sync inbox state + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub); + +/** + * dmub_srv_wait_for_inbox_free() - Waits for space in the DMUB inbox to free up + * @dmub: the dmub service + * @timeout_us: the maximum number of microseconds to wait + * @num_free_required: number of free entries required + * + * Waits until the DMUB buffer is freed to the specified number. + * The maximum wait time is given in microseconds to prevent spinning + * forever. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_wait_for_inbox_free(struct dmub_srv *dmub, + uint32_t timeout_us, + uint32_t num_free_required); + +/** + * dmub_srv_update_inbox_status() - Updates pending status for inbox & reg inbox0 + * @dmub: the dmub service + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out + * DMUB_STATUS_HW_FAILURE - issue with HW programming + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_update_inbox_status(struct dmub_srv *dmub); + #endif /* _DMUB_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 1f5f4e3e49d4..57fa05bddb45 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -36,6 +36,9 @@ //================================================================== /* Basic type definitions. */ +#ifdef __forceinline +#undef __forceinline +#endif #define __forceinline inline /** @@ -547,6 +550,11 @@ union replay_hw_flags { * @is_alpm_initialized: Indicates whether ALPM is initialized */ uint32_t is_alpm_initialized : 1; + + /** + * @alpm_mode: Indicates ALPM mode selected + */ + uint32_t alpm_mode : 2; } bitfields; uint32_t u32All; @@ -739,6 +747,14 @@ enum dmub_ips_disable_type { DMUB_IPS_DISABLE_IPS2_Z10 = 4, DMUB_IPS_DISABLE_DYNAMIC = 5, DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF = 6, + DMUB_IPS_DISABLE_Z8_RETENTION = 7, +}; + +enum dmub_ips_rcg_disable_type { + DMUB_IPS_RCG_ENABLE = 0, + DMUB_IPS0_RCG_DISABLE = 1, + DMUB_IPS1_RCG_DISABLE = 2, + DMUB_IPS_RCG_DISABLE = 3 }; #define DMUB_IPS1_ALLOW_MASK 0x00000001 @@ -817,11 +833,12 @@ enum dmub_shared_state_feature_id { */ union dmub_shared_state_ips_fw_signals { struct { - uint32_t ips1_commit : 1; /**< 1 if in IPS1 */ + uint32_t ips1_commit : 1; /**< 1 if in IPS1 or IPS0 RCG */ uint32_t ips2_commit : 1; /**< 1 if in IPS2 */ uint32_t in_idle : 1; /**< 1 if DMCUB is in idle */ uint32_t detection_required : 1; /**< 1 if detection is required */ - uint32_t reserved_bits : 28; /**< Reversed */ + uint32_t ips1z8_commit: 1; /**< 1 if in IPS1 Z8 Retention */ + uint32_t reserved_bits : 27; /**< Reversed */ } bits; uint32_t all; }; @@ -836,7 +853,10 @@ union dmub_shared_state_ips_driver_signals { uint32_t allow_ips2 : 1; /**< 1 is IPS1 is allowed */ uint32_t allow_z10 : 1; /**< 1 if Z10 is allowed */ uint32_t allow_idle: 1; /**< 1 if driver is allowing idle */ - uint32_t reserved_bits : 27; /**< Reversed bits */ + uint32_t allow_ips0_rcg : 1; /**< 1 is IPS0 RCG is allowed */ + uint32_t allow_ips1_rcg : 1; /**< 1 is IPS1 RCG is allowed */ + uint32_t allow_ips1z8 : 1; /**< 1 is IPS1 Z8 Retention is allowed */ + uint32_t reserved_bits : 24; /**< Reversed bits */ } bits; uint32_t all; }; @@ -865,7 +885,9 @@ struct dmub_shared_state_ips_fw { uint32_t ips1_exit_count; /**< Exit counter for IPS1 */ uint32_t ips2_entry_count; /**< Entry counter for IPS2 */ uint32_t ips2_exit_count; /**< Exit counter for IPS2 */ - uint32_t reserved[55]; /**< Reversed, to be updated when adding new fields. */ + uint32_t ips1_z8ret_entry_count; /**< Entry counter for IPS1 Z8 Retention */ + uint32_t ips1_z8ret_exit_count; /**< Exit counter for IPS1 Z8 Retention */ + uint32_t reserved[53]; /**< Reversed, to be updated when adding new fields. */ }; /* 248-bytes, fixed */ /** @@ -1253,6 +1275,10 @@ enum dmub_gpint_command { * DESC: Setup debug configs. */ DMUB_GPINT__SETUP_DEBUG_MODE = 136, + /** + * DESC: Initiates IPS wake sequence. + */ + DMUB_GPINT__IPS_DEBUG_WAKE = 137, }; /** @@ -2113,6 +2139,11 @@ union dmub_cmd_fams2_config { } stream_v1; //v1 }; +struct dmub_fams2_config_v2 { + struct dmub_cmd_fams2_global_config global; + struct dmub_fams2_stream_static_state_v1 stream_v1[DMUB_MAX_STREAMS]; //v1 +}; + /** * DMUB rb command definition for FAMS2 (merged SubVP, FPO, Legacy) */ @@ -2121,6 +2152,22 @@ struct dmub_rb_cmd_fams2 { union dmub_cmd_fams2_config config; }; +/** + * Indirect buffer descriptor + */ +struct dmub_ib_data { + union dmub_addr src; // location of indirect buffer in memory + uint16_t size; // indirect buffer size in bytes +}; + +/** + * DMUB rb command definition for commands passed over indirect buffer + */ +struct dmub_rb_cmd_ib { + struct dmub_cmd_header header; + struct dmub_ib_data ib_data; +}; + /** * enum dmub_cmd_idle_opt_type - Idle optimization command type. */ @@ -2144,6 +2191,11 @@ enum dmub_cmd_idle_opt_type { * DCN hardware notify power state. */ DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE = 3, + + /** + * DCN notify to release HW. + */ + DMUB_CMD__IDLE_OPT_RELEASE_HW = 4, }; /** @@ -2636,7 +2688,11 @@ enum dp_hpd_type { /** * DP HPD short pulse */ - DP_IRQ + DP_IRQ = 1, + /** + * Failure to acquire DP HPD state + */ + DP_NONE_HPD = 2 }; /** @@ -2901,8 +2957,9 @@ enum dmub_cmd_fams_type { */ DMUB_CMD__FAMS_SET_MANUAL_TRIGGER = 3, DMUB_CMD__FAMS2_CONFIG = 4, - DMUB_CMD__FAMS2_DRR_UPDATE = 5, - DMUB_CMD__FAMS2_FLIP = 6, + DMUB_CMD__FAMS2_IB_CONFIG = 5, + DMUB_CMD__FAMS2_DRR_UPDATE = 6, + DMUB_CMD__FAMS2_FLIP = 7, }; /** @@ -3609,6 +3666,12 @@ struct dmub_rb_cmd_psr_set_power_opt { struct dmub_cmd_psr_set_power_opt_data psr_set_power_opt_data; }; +enum dmub_alpm_mode { + ALPM_AUXWAKE = 0, + ALPM_AUXLESS = 1, + ALPM_UNSUPPORTED = 2, +}; + /** * Definition of Replay Residency GPINT command. * Bit[0] - Residency mode for Revision 0 @@ -3742,6 +3805,15 @@ enum dmub_cmd_replay_general_subtype { REPLAY_GENERAL_CMD_SET_LOW_RR_ACTIVATE, }; +struct dmub_alpm_auxless_data { + uint16_t lfps_setup_ns; + uint16_t lfps_period_ns; + uint16_t lfps_silence_ns; + uint16_t lfps_t1_t2_override_us; + short lfps_t1_t2_offset_us; + uint8_t lttpr_count; +}; + /** * Data passed from driver to FW in a DMUB_CMD__REPLAY_COPY_SETTINGS command. */ @@ -3812,6 +3884,10 @@ struct dmub_cmd_replay_copy_settings_data { * Use FSM state for Replay power up/down */ uint8_t use_phy_fsm; + /** + * Use for AUX-less ALPM LFPS wake operation + */ + struct dmub_alpm_auxless_data auxless_alpm_data; }; /** @@ -4360,6 +4436,11 @@ enum dmub_cmd_abm_type { * Get the current ACE curve. */ DMUB_CMD__ABM_GET_ACE_CURVE = 10, + + /** + * Get current histogram data + */ + DMUB_CMD__ABM_GET_HISTOGRAM_DATA = 11, }; struct abm_ace_curve { @@ -4953,6 +5034,20 @@ enum dmub_abm_ace_curve_type { ABM_ACE_CURVE_TYPE__SW_IF = 1, }; +/** + * enum dmub_abm_histogram_type - Histogram type. + */ +enum dmub_abm_histogram_type { + /** + * ACE curve as defined by the SW layer. + */ + ABM_HISTOGRAM_TYPE__SW = 0, + /** + * ACE curve as defined by the SW to HW translation interface layer. + */ + ABM_HISTOGRAM_TYPE__SW_IF = 1, +}; + /** * Definition of a DMUB_CMD__ABM_GET_ACE_CURVE command. */ @@ -4988,6 +5083,41 @@ struct dmub_rb_cmd_abm_get_ace_curve { uint8_t pad; }; +/** + * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command. + */ +struct dmub_rb_cmd_abm_get_histogram { + /** + * Command header. + */ + struct dmub_cmd_header header; + + /** + * Address where Histogram should be copied. + */ + union dmub_addr dest; + + /** + * Type of Histogram being queried. + */ + enum dmub_abm_histogram_type histogram_type; + + /** + * Indirect buffer length. + */ + uint16_t bytes; + + /** + * eDP panel instance. + */ + uint8_t panel_inst; + + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad; +}; + /** * Definition of a DMUB_CMD__ABM_SAVE_RESTORE command. */ @@ -5389,7 +5519,8 @@ struct dmub_cmd_fused_request { struct dmub_cmd_fused_request_location_i2c { uint8_t is_aux : 1; // False uint8_t ddc_line : 3; - uint8_t _reserved0 : 4; + uint8_t over_aux : 1; + uint8_t _reserved0 : 3; uint8_t address; uint8_t offset; uint8_t length; @@ -5686,6 +5817,11 @@ union dmub_rb_cmd { */ struct dmub_rb_cmd_abm_get_ace_curve abm_get_ace_curve; + /** + * Definition of a DMUB_CMD__ABM_GET_HISTOGRAM command. + */ + struct dmub_rb_cmd_abm_get_histogram abm_get_histogram; + /** * Definition of a DMUB_CMD__ABM_SET_EVENT command. */ @@ -5817,8 +5953,11 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__PSP_ASSR_ENABLE command. */ struct dmub_rb_cmd_assr_enable assr_enable; + struct dmub_rb_cmd_fams2 fams2_config; + struct dmub_rb_cmd_ib ib_fams2_config; + struct dmub_rb_cmd_fams2_drr_update fams2_drr_update; struct dmub_rb_cmd_fams2_flip fams2_flip; @@ -5934,6 +6073,9 @@ static inline uint32_t dmub_rb_num_free(struct dmub_rb *rb) else data_count = rb->capacity - (rb->rptr - rb->wrpt); + /* +1 because 1 entry is always unusable */ + data_count += DMUB_RB_CMD_SIZE; + return (rb->capacity - data_count) / DMUB_RB_CMD_SIZE; } @@ -5953,6 +6095,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) else data_count = rb->capacity - (rb->rptr - rb->wrpt); + /* -1 because 1 entry is always unusable */ return (data_count == (rb->capacity - DMUB_RB_CMD_SIZE)); } diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index e67f7c4784eb..2575dbc448f7 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -66,24 +66,20 @@ void dmub_dcn401_reset(struct dmub_srv *dmub) const uint32_t timeout_us = 1 * 1000 * 1000; //1s const uint32_t poll_delay_us = 1; //1us uint32_t i = 0; - uint32_t in_reset, scratch, pwait_mode; + uint32_t enabled, in_reset, scratch, pwait_mode; - REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); + REG_GET(DMCUB_CNTL, + DMCUB_ENABLE, &enabled); + REG_GET(DMCUB_CNTL2, + DMCUB_SOFT_RESET, &in_reset); - if (in_reset == 0) { + if (enabled && in_reset == 0) { cmd.bits.status = 1; cmd.bits.command_code = DMUB_GPINT__STOP_FW; cmd.bits.param = 0; dmub->hw_funcs.set_gpint(dmub, cmd); - for (i = 0; i < timeout_us; i++) { - if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) - break; - - udelay(poll_delay_us); - } - for (; i < timeout_us; i++) { scratch = dmub->hw_funcs.get_gpint_response(dmub); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) @@ -517,28 +513,69 @@ void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) { uint32_t *dwords = (uint32_t *)cmd; - + int32_t payload_size_bytes = cmd->cmd_common.header.payload_bytes; + uint32_t msg_index; static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[0]); - REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[1]); - REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[2]); - REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[3]); - REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[4]); - REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[5]); - REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[6]); - REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[7]); - REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[8]); - REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[9]); - REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[10]); - REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[11]); - REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[12]); - REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[13]); - REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[14]); + /* read remaining data based on payload size */ + for (msg_index = 0; msg_index < 15; msg_index++) { + if (payload_size_bytes <= msg_index * 4) { + break; + } + + switch (msg_index) { + case 0: + REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[msg_index + 1]); + break; + case 1: + REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[msg_index + 1]); + break; + case 2: + REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[msg_index + 1]); + break; + case 3: + REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[msg_index + 1]); + break; + case 4: + REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[msg_index + 1]); + break; + case 5: + REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[msg_index + 1]); + break; + case 6: + REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[msg_index + 1]); + break; + case 7: + REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[msg_index + 1]); + break; + case 8: + REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[msg_index + 1]); + break; + case 9: + REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[msg_index + 1]); + break; + case 10: + REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[msg_index + 1]); + break; + case 11: + REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[msg_index + 1]); + break; + case 12: + REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[msg_index + 1]); + break; + case 13: + REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[msg_index + 1]); + break; + case 14: + REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[msg_index + 1]); + break; + } + } + /* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY * interrupt. */ - REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[15]); + REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[0]); } uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub) @@ -556,30 +593,39 @@ void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - dwords[0] = REG_READ(DMCUB_REG_INBOX0_MSG0); - dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG1); - dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG2); - dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG3); - dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG4); - dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG5); - dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG6); - dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG7); - dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG8); - dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG9); - dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG10); - dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG11); - dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG12); - dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG13); - dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG14); - dwords[15] = REG_READ(DMCUB_REG_INBOX0_RSP); + dwords[0] = REG_READ(DMCUB_REG_INBOX0_RSP); + dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG0); + dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG1); + dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG2); + dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG3); + dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG4); + dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG5); + dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG6); + dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG7); + dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG8); + dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG9); + dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG10); + dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG11); + dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG12); + dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG13); + dwords[15] = REG_READ(DMCUB_REG_INBOX0_MSG14); } void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1); +} + +void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) +{ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0); } +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); +} + void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 1); @@ -604,11 +650,6 @@ uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub) return status; } -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) -{ - REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); -} - void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN, enable ? 1:0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h index c35be52676f6..88c3a44d67d9 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h @@ -277,11 +277,13 @@ uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub); void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); + void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub); void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg); void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg); uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub); -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable); uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index ae8133816b43..acca7943a8c8 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -157,6 +157,9 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) { struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs; + /* default to specifying now inbox type */ + enum dmub_inbox_cmd_interface_type default_inbox_type = DMUB_CMD_INTERFACE_DEFAULT; + switch (asic) { case DMUB_ASIC_DCN20: case DMUB_ASIC_DCN21: @@ -395,10 +398,15 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->get_current_time = dmub_dcn401_get_current_time; funcs->get_diagnostic_data = dmub_dcn401_get_diagnostic_data; + funcs->send_reg_inbox0_cmd_msg = dmub_dcn401_send_reg_inbox0_cmd_msg; funcs->read_reg_inbox0_rsp_int_status = dmub_dcn401_read_reg_inbox0_rsp_int_status; funcs->read_reg_inbox0_cmd_rsp = dmub_dcn401_read_reg_inbox0_cmd_rsp; funcs->write_reg_inbox0_rsp_int_ack = dmub_dcn401_write_reg_inbox0_rsp_int_ack; + funcs->clear_reg_inbox0_rsp_int_ack = dmub_dcn401_clear_reg_inbox0_rsp_int_ack; + funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int; + default_inbox_type = DMUB_CMD_INTERFACE_FB; // still default to FB for now + funcs->write_reg_outbox0_rdy_int_ack = dmub_dcn401_write_reg_outbox0_rdy_int_ack; funcs->read_reg_outbox0_msg = dmub_dcn401_read_reg_outbox0_msg; funcs->write_reg_outbox0_rsp = dmub_dcn401_write_reg_outbox0_rsp; @@ -411,6 +419,20 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) return false; } + /* set default inbox type if not overriden */ + if (dmub->inbox_type == DMUB_CMD_INTERFACE_DEFAULT) { + if (default_inbox_type != DMUB_CMD_INTERFACE_DEFAULT) { + /* use default inbox type as specified by DCN rev */ + dmub->inbox_type = default_inbox_type; + } else if (funcs->send_reg_inbox0_cmd_msg) { + /* prefer reg as default inbox type if present */ + dmub->inbox_type = DMUB_CMD_INTERFACE_REG; + } else { + /* use fb as fallback */ + dmub->inbox_type = DMUB_CMD_INTERFACE_FB; + } + } + return true; } @@ -426,6 +448,7 @@ enum dmub_status dmub_srv_create(struct dmub_srv *dmub, dmub->asic = params->asic; dmub->fw_version = params->fw_version; dmub->is_virtual = params->is_virtual; + dmub->inbox_type = params->inbox_type; /* Setup asic dependent hardware funcs. */ if (!dmub_srv_hw_setup(dmub, params->asic)) { @@ -695,7 +718,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, inbox1.base = cw4.region.base; inbox1.top = cw4.region.base + DMUB_RB_SIZE; outbox1.base = inbox1.top; - outbox1.top = cw4.region.top; + outbox1.top = inbox1.top + DMUB_RB_SIZE; cw5.offset.quad_part = tracebuff_fb->gpu_addr; cw5.region.base = DMUB_CW5_BASE; @@ -737,7 +760,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, rb_params.ctx = dmub; rb_params.base_address = mail_fb->cpu_addr; rb_params.capacity = DMUB_RB_SIZE; - dmub_rb_init(&dmub->inbox1_rb, &rb_params); + dmub_rb_init(&dmub->inbox1.rb, &rb_params); // Initialize outbox1 ring buffer rb_params.ctx = dmub; @@ -768,27 +791,6 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) -{ - if (!dmub->sw_init) - return DMUB_STATUS_INVALID; - - if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { - uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); - - if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) { - return DMUB_STATUS_HW_FAILURE; - } else { - dmub->inbox1_rb.rptr = rptr; - dmub->inbox1_rb.wrpt = wptr; - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; - } - } - - return DMUB_STATUS_OK; -} - enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) { if (!dmub->sw_init) @@ -799,8 +801,13 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) /* mailboxes have been reset in hw, so reset the sw state as well */ dmub->inbox1_last_wptr = 0; - dmub->inbox1_rb.wrpt = 0; - dmub->inbox1_rb.rptr = 0; + dmub->inbox1.rb.wrpt = 0; + dmub->inbox1.rb.rptr = 0; + dmub->inbox1.num_reported = 0; + dmub->inbox1.num_submitted = 0; + dmub->reg_inbox0.num_reported = 0; + dmub->reg_inbox0.num_submitted = 0; + dmub->reg_inbox0.is_pending = 0; dmub->outbox0_rb.wrpt = 0; dmub->outbox0_rb.rptr = 0; dmub->outbox1_rb.wrpt = 0; @@ -811,7 +818,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd) { if (!dmub->hw_init) @@ -820,18 +827,20 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, if (dmub->power_state != DMUB_POWER_STATE_D0) return DMUB_STATUS_POWER_STATE_D3; - if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || - dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { + if (dmub->inbox1.rb.rptr > dmub->inbox1.rb.capacity || + dmub->inbox1.rb.wrpt > dmub->inbox1.rb.capacity) { return DMUB_STATUS_HW_FAILURE; } - if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) + if (dmub_rb_push_front(&dmub->inbox1.rb, cmd)) { + dmub->inbox1.num_submitted++; return DMUB_STATUS_OK; + } return DMUB_STATUS_QUEUE_FULL; } -enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) +enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub) { struct dmub_rb flush_rb; @@ -846,13 +855,13 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) * been flushed to framebuffer memory. Otherwise DMCUB might * read back stale, fully invalid or partially invalid data. */ - flush_rb = dmub->inbox1_rb; + flush_rb = dmub->inbox1.rb; flush_rb.rptr = dmub->inbox1_last_wptr; dmub_rb_flush_pending(&flush_rb); - dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1.rb.wrpt); - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; return DMUB_STATUS_OK; } @@ -910,26 +919,84 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, return DMUB_STATUS_TIMEOUT; } +static void dmub_srv_update_reg_inbox0_status(struct dmub_srv *dmub) +{ + if (dmub->reg_inbox0.is_pending) { + dmub->reg_inbox0.is_pending = dmub->hw_funcs.read_reg_inbox0_rsp_int_status && + !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + + if (!dmub->reg_inbox0.is_pending) { + /* ack the rsp interrupt */ + if (dmub->hw_funcs.write_reg_inbox0_rsp_int_ack) + dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); + + /* only update the reported count if commands aren't being batched */ + if (!dmub->reg_inbox0.is_pending && !dmub->reg_inbox0.is_multi_pending) { + dmub->reg_inbox0.num_reported = dmub->reg_inbox0.num_submitted; + } + } + } +} + +enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, + uint32_t timeout_us) +{ + uint32_t i; + const uint32_t polling_interval_us = 1; + struct dmub_srv_inbox scratch_reg_inbox0 = dmub->reg_inbox0; + struct dmub_srv_inbox scratch_inbox1 = dmub->inbox1; + const volatile struct dmub_srv_inbox *reg_inbox0 = &dmub->reg_inbox0; + const volatile struct dmub_srv_inbox *inbox1 = &dmub->inbox1; + + if (!dmub->hw_init || + !dmub->hw_funcs.get_inbox1_wptr) + return DMUB_STATUS_INVALID; + + for (i = 0; i <= timeout_us; i += polling_interval_us) { + scratch_inbox1.rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub); + scratch_inbox1.rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + + scratch_reg_inbox0.is_pending = scratch_reg_inbox0.is_pending && + dmub->hw_funcs.read_reg_inbox0_rsp_int_status && + !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + + if (scratch_inbox1.rb.rptr > dmub->inbox1.rb.capacity) + return DMUB_STATUS_HW_FAILURE; + + /* check current HW state first, but use command submission vs reported as a fallback */ + if ((dmub_rb_empty(&scratch_inbox1.rb) || + inbox1->num_reported >= scratch_inbox1.num_submitted) && + (!scratch_reg_inbox0.is_pending || + reg_inbox0->num_reported >= scratch_reg_inbox0.num_submitted)) + return DMUB_STATUS_OK; + + udelay(polling_interval_us); + } + + return DMUB_STATUS_TIMEOUT; +} + enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub, uint32_t timeout_us) { - uint32_t i, rptr; + enum dmub_status status; + uint32_t i; + const uint32_t polling_interval_us = 1; if (!dmub->hw_init) return DMUB_STATUS_INVALID; - for (i = 0; i <= timeout_us; ++i) { - rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + for (i = 0; i < timeout_us; i += polling_interval_us) { + status = dmub_srv_update_inbox_status(dmub); - if (rptr > dmub->inbox1_rb.capacity) - return DMUB_STATUS_HW_FAILURE; + if (status != DMUB_STATUS_OK) + return status; - dmub->inbox1_rb.rptr = rptr; - - if (dmub_rb_empty(&dmub->inbox1_rb)) + /* check for idle */ + if (dmub_rb_empty(&dmub->inbox1.rb) && !dmub->reg_inbox0.is_pending) return DMUB_STATUS_OK; - udelay(1); + udelay(polling_interval_us); } return DMUB_STATUS_TIMEOUT; @@ -1040,35 +1107,6 @@ enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd) -{ - enum dmub_status status = DMUB_STATUS_OK; - - // Queue command - status = dmub_srv_cmd_queue(dmub, cmd); - - if (status != DMUB_STATUS_OK) - return status; - - // Execute command - status = dmub_srv_cmd_execute(dmub); - - if (status != DMUB_STATUS_OK) - return status; - - // Wait for DMUB to process command - status = dmub_srv_wait_for_idle(dmub, 100000); - - if (status != DMUB_STATUS_OK) - return status; - - // Copy data back from ring buffer into command - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd); - - return status; -} - static inline bool dmub_rb_out_trace_buffer_front(struct dmub_rb *rb, void *entry) { @@ -1160,43 +1198,6 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_ } } - -enum dmub_status dmub_srv_send_reg_inbox0_cmd( - struct dmub_srv *dmub, - union dmub_rb_cmd *cmd, - bool with_reply, uint32_t timeout_us) -{ - uint32_t rsp_ready = 0; - uint32_t i; - - dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd); - - for (i = 0; i < timeout_us; i++) { - rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - if (rsp_ready) - break; - udelay(1); - } - if (rsp_ready == 0) - return DMUB_STATUS_TIMEOUT; - - if (with_reply) - dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd); - - dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); - - /* wait for rsp int status is cleared to initial state before exit */ - for (; i <= timeout_us; i++) { - rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - if (rsp_ready == 0) - break; - udelay(1); - } - ASSERT(rsp_ready == 0); - - return DMUB_STATUS_OK; -} - void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) { if (!dmub || !dmub->hw_init) @@ -1204,3 +1205,155 @@ void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_t dmub->power_state = dmub_srv_power_state; } + +enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) +{ + uint32_t num_pending = 0; + + if (!dmub->hw_init) + return DMUB_STATUS_INVALID; + + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_POWER_STATE_D3; + + if (!dmub->hw_funcs.send_reg_inbox0_cmd_msg || + !dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack) + return DMUB_STATUS_INVALID; + + if (dmub->reg_inbox0.num_submitted >= dmub->reg_inbox0.num_reported) + num_pending = dmub->reg_inbox0.num_submitted - dmub->reg_inbox0.num_reported; + else + /* num_submitted wrapped */ + num_pending = DMUB_REG_INBOX0_RB_MAX_ENTRY - + (dmub->reg_inbox0.num_reported - dmub->reg_inbox0.num_submitted); + + if (num_pending >= DMUB_REG_INBOX0_RB_MAX_ENTRY) + return DMUB_STATUS_QUEUE_FULL; + + /* clear last rsp ack and send message */ + dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack(dmub); + dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd); + + dmub->reg_inbox0.num_submitted++; + dmub->reg_inbox0.is_pending = true; + dmub->reg_inbox0.is_multi_pending = cmd->cmd_common.header.multi_cmd_pending; + + return DMUB_STATUS_OK; +} + +void dmub_srv_cmd_get_response(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd_rsp) +{ + if (dmub) { + if (dmub->inbox_type == DMUB_CMD_INTERFACE_REG && + dmub->hw_funcs.read_reg_inbox0_cmd_rsp) { + dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd_rsp); + } else { + dmub_rb_get_return_data(&dmub->inbox1.rb, cmd_rsp); + } + } +} + +static enum dmub_status dmub_srv_sync_reg_inbox0(struct dmub_srv *dmub) +{ + if (!dmub || !dmub->sw_init) + return DMUB_STATUS_INVALID; + + dmub->reg_inbox0.is_pending = 0; + dmub->reg_inbox0.is_multi_pending = 0; + + return DMUB_STATUS_OK; +} + +static enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) +{ + if (!dmub->sw_init) + return DMUB_STATUS_INVALID; + + if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { + uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); + + if (rptr > dmub->inbox1.rb.capacity || wptr > dmub->inbox1.rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } else { + dmub->inbox1.rb.rptr = rptr; + dmub->inbox1.rb.wrpt = wptr; + dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; + } + } + + return DMUB_STATUS_OK; +} + +enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub) +{ + enum dmub_status status; + + status = dmub_srv_sync_reg_inbox0(dmub); + if (status != DMUB_STATUS_OK) + return status; + + status = dmub_srv_sync_inbox1(dmub); + if (status != DMUB_STATUS_OK) + return status; + + return DMUB_STATUS_OK; +} + +enum dmub_status dmub_srv_wait_for_inbox_free(struct dmub_srv *dmub, + uint32_t timeout_us, + uint32_t num_free_required) +{ + enum dmub_status status; + uint32_t i; + const uint32_t polling_interval_us = 1; + + if (!dmub->hw_init) + return DMUB_STATUS_INVALID; + + for (i = 0; i < timeout_us; i += polling_interval_us) { + status = dmub_srv_update_inbox_status(dmub); + + if (status != DMUB_STATUS_OK) + return status; + + /* check for space in inbox1 */ + if (dmub_rb_num_free(&dmub->inbox1.rb) >= num_free_required) + return DMUB_STATUS_OK; + + udelay(polling_interval_us); + } + + return DMUB_STATUS_TIMEOUT; +} + +enum dmub_status dmub_srv_update_inbox_status(struct dmub_srv *dmub) +{ + uint32_t rptr; + + if (!dmub->hw_init) + return DMUB_STATUS_INVALID; + + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_POWER_STATE_D3; + + /* update inbox1 state */ + rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + + if (rptr > dmub->inbox1.rb.capacity) + return DMUB_STATUS_HW_FAILURE; + + if (dmub->inbox1.rb.rptr > rptr) { + /* rb wrapped */ + dmub->inbox1.num_reported += (rptr + dmub->inbox1.rb.capacity - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; + } else { + dmub->inbox1.num_reported += (rptr - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; + } + dmub->inbox1.rb.rptr = rptr; + + /* update reg_inbox0 */ + dmub_srv_update_reg_inbox0_status(dmub); + + return DMUB_STATUS_OK; +} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c index cce887cefc01..567c5b1aeb7a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c @@ -95,23 +95,6 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, case DMUB_OUT_CMD__DPIA_NOTIFICATION: notify->type = DMUB_NOTIFICATION_DPIA_NOTIFICATION; notify->link_index = cmd.dpia_notification.payload.header.instance; - - if (cmd.dpia_notification.payload.header.type == DPIA_NOTIFY__BW_ALLOCATION) { - - notify->dpia_notification.payload.data.dpia_bw_alloc.estimated_bw = - cmd.dpia_notification.payload.data.dpia_bw_alloc.estimated_bw; - notify->dpia_notification.payload.data.dpia_bw_alloc.allocated_bw = - cmd.dpia_notification.payload.data.dpia_bw_alloc.allocated_bw; - - if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_failed) - notify->result = DPIA_BW_REQ_FAILED; - else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_request_succeeded) - notify->result = DPIA_BW_REQ_SUCCESS; - else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.est_bw_changed) - notify->result = DPIA_EST_BW_CHANGED; - else if (cmd.dpia_notification.payload.data.dpia_bw_alloc.bits.bw_alloc_cap_changed) - notify->result = DPIA_BW_ALLOC_CAPS_CHANGED; - } break; case DMUB_OUT_CMD__HPD_SENSE_NOTIFY: notify->type = DMUB_NOTIFICATION_HPD_SENSE_NOTIFY; @@ -119,6 +102,10 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, &cmd.hpd_sense_notify.data, sizeof(cmd.hpd_sense_notify.data)); break; + case DMUB_OUT_CMD__FUSED_IO: + notify->type = DMUB_NOTIFICATION_FUSED_IO; + dmub_memcpy(¬ify->fused_request, &cmd.fused_io.request, sizeof(cmd.fused_io.request)); + break; default: notify->type = DMUB_NOTIFICATION_NO_DATA; break; diff --git a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h index 7e3240e73c1f..63813009a3a6 100644 --- a/drivers/gpu/drm/amd/display/include/gpio_service_interface.h +++ b/drivers/gpu/drm/amd/display/include/gpio_service_interface.h @@ -86,6 +86,9 @@ enum dc_irq_source dal_irq_get_source( enum dc_irq_source dal_irq_get_rx_source( const struct gpio *irq); +enum dc_irq_source dal_irq_get_read_request( + const struct gpio *irq); + enum gpio_result dal_irq_setup_hpd_filter( struct gpio *irq, struct gpio_hpd_config *config); diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h index 1867aac57cf2..da74ed66c8f9 100644 --- a/drivers/gpu/drm/amd/display/include/link_service_types.h +++ b/drivers/gpu/drm/amd/display/include/link_service_types.h @@ -89,6 +89,8 @@ struct link_training_settings { bool enhanced_framing; enum lttpr_mode lttpr_mode; + bool lttpr_early_tps2; + /* disallow different lanes to have different lane settings */ bool disallow_per_lane_settings; /* dpcd lane settings will always use the same hw lane settings diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index 55c7d873175f..a37634942b07 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -386,6 +386,7 @@ enum mod_hdcp_status mod_hdcp_write_repeater_auth_ack(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_write_stream_manage(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_write_content_type(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp); +enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp); /* hdcp version helpers */ static inline uint8_t is_dp_hdcp(struct mod_hdcp *hdcp) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index 1d41dd58f6bc..bb8ae80b37f8 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -452,21 +452,12 @@ out: return status; } -static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp, +static enum mod_hdcp_status locality_check_sw(struct mod_hdcp *hdcp, struct mod_hdcp_event_context *event_ctx, struct mod_hdcp_transition_input_hdcp2 *input) { enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) { - event_ctx->unexpected_event = 1; - goto out; - } - - if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init, - &input->lc_init_prepare, &status, - hdcp, "lc_init_prepare")) - goto out; if (!mod_hdcp_execute_and_set(mod_hdcp_write_lc_init, &input->lc_init_write, &status, hdcp, "lc_init_write")) @@ -482,6 +473,48 @@ static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp, &input->l_prime_read, &status, hdcp, "l_prime_read")) goto out; +out: + return status; +} + +static enum mod_hdcp_status locality_check_fw(struct mod_hdcp *hdcp, + struct mod_hdcp_event_context *event_ctx, + struct mod_hdcp_transition_input_hdcp2 *input) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; + + if (!mod_hdcp_execute_and_set(mod_hdcp_write_poll_read_lc_fw, + &input->l_prime_read, &status, + hdcp, "l_prime_read")) + goto out; + +out: + return status; +} + +static enum mod_hdcp_status locality_check(struct mod_hdcp *hdcp, + struct mod_hdcp_event_context *event_ctx, + struct mod_hdcp_transition_input_hdcp2 *input) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; + const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c + && hdcp->config.ddc.funcs.atomic_write_poll_read_aux + && !hdcp->connection.link.adjust.hdcp2.force_sw_locality_check; + + if (event_ctx->event != MOD_HDCP_EVENT_CALLBACK) { + event_ctx->unexpected_event = 1; + goto out; + } + + if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_prepare_lc_init, + &input->lc_init_prepare, &status, + hdcp, "lc_init_prepare")) + goto out; + + status = (use_fw ? locality_check_fw : locality_check_sw)(hdcp, event_ctx, input); + if (status != MOD_HDCP_STATUS_SUCCESS) + goto out; + if (!mod_hdcp_execute_and_set(mod_hdcp_hdcp2_validate_l_prime, &input->l_prime_validation, &status, hdcp, "l_prime_validation")) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c index c5f6c11de7e5..89ffb89e1932 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c @@ -184,17 +184,28 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, H2_A2_LOCALITY_CHECK); break; - case H2_A2_LOCALITY_CHECK: + case H2_A2_LOCALITY_CHECK: { + const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_i2c + && !adjust->hdcp2.force_sw_locality_check; + + /* + * 1A-05: consider disconnection after LC init a failure + * 1A-13-1: consider invalid l' a failure + * 1A-13-2: consider l' timeout a failure + */ if (hdcp->state.stay_count > 10 || input->lc_init_prepare != PASS || - input->lc_init_write != PASS || - input->l_prime_available_poll != PASS || - input->l_prime_read != PASS) { - /* - * 1A-05: consider disconnection after LC init a failure - * 1A-13-1: consider invalid l' a failure - * 1A-13-2: consider l' timeout a failure - */ + (!use_fw && input->lc_init_write != PASS) || + (!use_fw && input->l_prime_available_poll != PASS)) { + fail_and_restart_in_ms(0, &status, output); + break; + } else if (input->l_prime_read != PASS) { + if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) { + adjust->hdcp2.force_sw_locality_check = true; + callback_in_ms(0, output); + break; + } + fail_and_restart_in_ms(0, &status, output); break; } else if (input->l_prime_validation != PASS) { @@ -205,6 +216,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER); break; + } case H2_A3_EXCHANGE_KS_AND_TEST_FOR_REPEATER: if (input->eks_prepare != PASS || input->eks_write != PASS) { @@ -498,12 +510,23 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, D2_A2_LOCALITY_CHECK); break; - case D2_A2_LOCALITY_CHECK: + case D2_A2_LOCALITY_CHECK: { + const bool use_fw = hdcp->config.ddc.funcs.atomic_write_poll_read_aux + && !adjust->hdcp2.force_sw_locality_check; + if (hdcp->state.stay_count > 10 || input->lc_init_prepare != PASS || - input->lc_init_write != PASS || - input->l_prime_read != PASS) { + (!use_fw && input->lc_init_write != PASS)) { /* 1A-12: consider invalid l' a failure */ + fail_and_restart_in_ms(0, &status, output); + break; + } else if (input->l_prime_read != PASS) { + if (use_fw && hdcp->config.debug.lc_enable_sw_fallback) { + adjust->hdcp2.force_sw_locality_check = true; + callback_in_ms(0, output); + break; + } + fail_and_restart_in_ms(0, &status, output); break; } else if (input->l_prime_validation != PASS) { @@ -514,6 +537,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp, callback_in_ms(0, output); set_state_id(hdcp, output, D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER); break; + } case D2_A34_EXCHANGE_KS_AND_TEST_FOR_REPEATER: if (input->eks_prepare != PASS || input->eks_write != PASS) { diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c index 6e064e6ae949..2e6408579194 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c @@ -688,3 +688,76 @@ enum mod_hdcp_status mod_hdcp_clear_cp_irq_status(struct mod_hdcp *hdcp) return MOD_HDCP_STATUS_INVALID_OPERATION; } + +static bool write_stall_read_lc_fw_aux(struct mod_hdcp *hdcp) +{ + struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2; + + struct mod_hdcp_atomic_op_aux write = { + hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT], + hdcp2->lc_init + 1, + sizeof(hdcp2->lc_init) - 1, + }; + struct mod_hdcp_atomic_op_aux stall = { 0, NULL, 0, }; + struct mod_hdcp_atomic_op_aux read = { + hdcp_dpcd_addrs[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME], + hdcp2->lc_l_prime + 1, + sizeof(hdcp2->lc_l_prime) - 1, + }; + + hdcp2->lc_l_prime[0] = HDCP_2_2_LC_SEND_LPRIME; + + return hdcp->config.ddc.funcs.atomic_write_poll_read_aux( + hdcp->config.ddc.handle, + &write, + &stall, + &read, + 16 * 1000, + 0 + ); +} + +static bool write_poll_read_lc_fw_i2c(struct mod_hdcp *hdcp) +{ + struct mod_hdcp_message_hdcp2 *hdcp2 = &hdcp->auth.msg.hdcp2; + uint8_t expected_rxstatus[2] = { sizeof(hdcp2->lc_l_prime) }; + + hdcp->buf[0] = hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_WRITE_LC_INIT]; + memmove(&hdcp->buf[1], hdcp2->lc_init, sizeof(hdcp2->lc_init)); + + struct mod_hdcp_atomic_op_i2c write = { + HDCP_I2C_ADDR, + 0, + hdcp->buf, + sizeof(hdcp2->lc_init) + 1, + }; + struct mod_hdcp_atomic_op_i2c poll = { + HDCP_I2C_ADDR, + hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_RXSTATUS], + expected_rxstatus, + sizeof(expected_rxstatus), + }; + struct mod_hdcp_atomic_op_i2c read = { + HDCP_I2C_ADDR, + hdcp_i2c_offsets[MOD_HDCP_MESSAGE_ID_READ_LC_SEND_L_PRIME], + hdcp2->lc_l_prime, + sizeof(hdcp2->lc_l_prime), + }; + + return hdcp->config.ddc.funcs.atomic_write_poll_read_i2c( + hdcp->config.ddc.handle, + &write, + &poll, + &read, + 20 * 1000, + 6 + ); +} + +enum mod_hdcp_status mod_hdcp_write_poll_read_lc_fw(struct mod_hdcp *hdcp) +{ + const bool success = (is_dp_hdcp(hdcp) ? write_stall_read_lc_fw_aux : write_poll_read_lc_fw_i2c)(hdcp); + + return success ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_DDC_FAILURE; +} + diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index a4d344a4db9e..c42468bb70ac 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -133,9 +133,22 @@ enum mod_hdcp_display_disable_option { MOD_HDCP_DISPLAY_DISABLE_ENCRYPTION, }; +struct mod_hdcp_atomic_op_i2c { + uint8_t address; + uint8_t offset; + uint8_t *data; + uint32_t size; +}; + +struct mod_hdcp_atomic_op_aux { + uint32_t address; + uint8_t *data; + uint32_t size; +}; + struct mod_hdcp_ddc { void *handle; - struct { + struct mod_hdcp_ddc_funcs { bool (*read_i2c)(void *handle, uint32_t address, uint8_t offset, @@ -153,6 +166,22 @@ struct mod_hdcp_ddc { uint32_t address, const uint8_t *data, uint32_t size); + bool (*atomic_write_poll_read_i2c)( + void *handle, + const struct mod_hdcp_atomic_op_i2c *write, + const struct mod_hdcp_atomic_op_i2c *poll, + struct mod_hdcp_atomic_op_i2c *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb + ); + bool (*atomic_write_poll_read_aux)( + void *handle, + const struct mod_hdcp_atomic_op_aux *write, + const struct mod_hdcp_atomic_op_aux *poll, + struct mod_hdcp_atomic_op_aux *read, + uint32_t poll_timeout_us, + uint8_t poll_mask_msb + ); } funcs; }; @@ -185,7 +214,8 @@ struct mod_hdcp_link_adjustment_hdcp2 { uint8_t force_type : 2; uint8_t force_no_stored_km : 1; uint8_t increase_h_prime_timeout: 1; - uint8_t reserved : 3; + uint8_t force_sw_locality_check : 1; + uint8_t reserved : 2; }; struct mod_hdcp_link_adjustment { @@ -272,6 +302,10 @@ struct mod_hdcp_display_query { struct mod_hdcp_config { struct mod_hdcp_psp psp; struct mod_hdcp_ddc ddc; + struct { + uint8_t lc_enable_sw_fallback : 1; + uint8_t reserved : 7; + } debug; uint8_t index; }; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 4c95b885d1d0..c8eccee9b023 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -366,7 +366,7 @@ enum DC_DEBUG_MASK { DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000, /** - * @DC_HDCP_LC_ENABLE_SW_FALLBACK If set, upon HDCP Locality Check FW + * @DC_HDCP_LC_ENABLE_SW_FALLBACK: If set, upon HDCP Locality Check FW * path failure, retry using legacy SW path. */ DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000, diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h index bd8085ec54ed..2d6a598a6c25 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h @@ -5242,6 +5242,8 @@ #define DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT 0x0000000c #define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE_MASK 0x00000003L #define DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT 0x00000000 +#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE_MASK 0x00000300L +#define DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT 0x00000008 #define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE_MASK 0x00000030L #define DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT 0x00000004 #define DENORM_CONTROL__DENORM_MODE_MASK 0x00000007L diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h index c75aee25619e..6f44345277af 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h @@ -1779,6 +1779,8 @@ #define mmRLC_TTOP_D 0x3105 #define mmRLC_CLEAR_STATE_RESTORE_BASE 0x30C8 #define mmRLC_PG_AO_CU_MASK 0x310B +#define mmSPI_STATIC_THREAD_MGMT_1 0x2438 +#define mmSPI_STATIC_THREAD_MGMT_2 0x2439 #define mmSPI_STATIC_THREAD_MGMT_3 0x243A #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h index edc8a793a95d..4dd386b98748 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h @@ -234,6 +234,26 @@ #define mmIH_RB_WPTR_ADDR_HI 0x0F84 #define mmIH_RB_WPTR_ADDR_LO 0x0F85 #define mmIH_STATUS 0x0F88 + +#define mmDMA_GFX_RB_CNTL 0x3400 +#define mmDMA_GFX_RB_BASE 0x3401 +#define mmDMA_GFX_RB_RPTR 0x3402 +#define mmDMA_GFX_RB_WPTR 0x3403 +#define mmDMA_GFX_RB_RPTR_ADDR_HI 0x3407 +#define mmDMA_GFX_RB_RPTR_ADDR_LO 0x3408 +#define mmDMA_GFX_IB_CNTL 0x3409 +#define mmDMA_GFX_IB_RPTR 0x340a +#define mmDMA_CNTL 0x340b +#define mmDMA_STATUS_REG 0x340D +#define mmDMA_TILING_CONFIG 0x342E +#define mmDMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411 +#define mmDMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412 +#define mmDMA_POWER_CNTL 0x342F +#define mmDMA_CLK_CTRL 0x3430 +#define mmDMA_PG 0x3435 +#define mmDMA_PGFSM_CONFIG 0x3436 +#define mmDMA_PGFSM_WRITE 0x3437 + #define mmSEM_MAILBOX 0x0F9B #define mmSEM_MAILBOX_CLIENTCONFIG 0x0F9A #define mmSEM_MAILBOX_CONTROL 0x0F9C @@ -269,7 +289,4 @@ #define mmVCE_CONFIG 0x0F94 #define mmXDMA_MSTR_MEM_OVERFLOW_CNTL 0x03F8 -/* from the old sid.h */ -#define mmDMA_TILING_CONFIG 0x342E - #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h index 1c540fe136cb..9f7fc2428b69 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_sh_mask.h @@ -823,6 +823,43 @@ #define LX3__RESERVED__SHIFT 0x00000000 #define RINGOSC_MASK__MASK_MASK 0x0000ffffL #define RINGOSC_MASK__MASK__SHIFT 0x00000000 + +#define DMA_CNTL__TRAP_ENABLE_MASK 0x00000001L +#define DMA_CNTL__TRAP_ENABLE__SHIFT 0x00000000 +#define DMA_CNTL__SEM_INCOMPLETE_INT_ENABLE_MASK 0x00000002L +#define DMA_CNTL__SEM_INCOMPLETE_INT_ENABLE__SHIFT 0x00000001 +#define DMA_CNTL__SEM_WAIT_INT_ENABLE_MASK 0x00000004L +#define DMA_CNTL__SEM_WAIT_INT_ENABLE__SHIFT 0x00000002 +#define DMA_CNTL__DATA_SWAP_ENABLE_MASK 0x00000008L +#define DMA_CNTL__DATA_SWAP_ENABLE__SHIFT 0x00000003 +#define DMA_CNTL__FENCE_SWAP_ENABLE_MASK 0x00000010L +#define DMA_CNTL__FENCE_SWAP_ENABLE__SHIFT 0x00000004 +#define DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK 0x10000000L +#define DMA_CNTL__CTXEMPTY_INT_ENABLE__SHIFT 0x0000001C +#define DMA_GFX_RB_CNTL__RB_ENABLE_MASK 0x00000001L +#define DMA_GFX_RB_CNTL__RB_ENABLE__SHIFT 0x00000000 +#define DMA_GFX_RB_CNTL__RB_SIZE__SHIFT 0x00000001 +#define DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK 0x00000200L +#define DMA_GFX_RB_CNTL__RB_SWAP_ENABLE__SHIFT 0x00000009 +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK 0x00001000L +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT 0x0000000C +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK 0x00002000L +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE__SHIFT 0x0000000D +#define DMA_GFX_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT 0x00000010 +#define DMA_GFX_IB_CNTL__IB_ENABLE_MASK 0x00000001L +#define DMA_GFX_IB_CNTL__IB_ENABLE__SHIFT 0x00000000 +#define DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK 0x00000010L +#define DMA_GFX_IB_CNTL__IB_SWAP_ENABLE__SHIFT 0x00000004 +#define DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK 0x80000000L +#define DMA_GFX_IB_CNTL__CMD_VMID_FORCE__SHIFT 0x0000001F + +#define DMA_STATUS_REG__IDLE_MASK 0x00000001L +#define DMA_STATUS_REG__IDLE__SHIFT 0x00000000 +#define DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK 0x00000100L +#define DMA_POWER_CNTL__MEM_POWER_OVERRIDE__SHIFT 0x00000008 +#define DMA_PG__PG_CNTL_ENABLE_MASK 0x00000001L +#define DMA_PG__PG_CNTL_ENABLE__SHIFT 0x00000000 + #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0_MASK 0x00000007L #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0__SHIFT 0x00000000 #define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT1_MASK 0x00000038L @@ -1015,6 +1052,10 @@ #define SRBM_STATUS2__VCE_BUSY__SHIFT 0x00000007 #define SRBM_STATUS2__VCE_RQ_PENDING_MASK 0x00000008L #define SRBM_STATUS2__VCE_RQ_PENDING__SHIFT 0x00000003 +#define SRBM_STATUS2__DMA_BUSY_MASK 0x00000020L +#define SRBM_STATUS2__DMA_BUSY__SHIFT 0x00000005 +#define SRBM_STATUS2__DMA1_BUSY_MASK 0x00000040L +#define SRBM_STATUS2__DMA1_BUSY__SHIFT 0x00000006 #define SRBM_STATUS2__XDMA_BUSY_MASK 0x00000100L #define SRBM_STATUS2__XDMA_BUSY__SHIFT 0x00000008 #define SRBM_STATUS2__XSP_BUSY_MASK 0x00000010L diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h index 6b10be61efc3..bdef1f743df7 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_d.h @@ -41,7 +41,49 @@ #define ixLCAC_MC5_CNTL 0x012B #define ixLCAC_MC5_OVR_SEL 0x012C #define ixLCAC_MC5_OVR_VAL 0x012D + +#define mmCG_SPLL_FUNC_CNTL 0x0180 +#define mmCG_SPLL_FUNC_CNTL_2 0x0181 +#define mmCG_SPLL_FUNC_CNTL_3 0x0182 +#define mmCG_SPLL_FUNC_CNTL_4 0x0183 +#define mmCG_SPLL_STATUS 0x0185 +#define mmSPLL_CNTL_MODE 0x0186 +#define mmCG_SPLL_SPREAD_SPECTRUM 0x0188 +#define mmCG_SPLL_SPREAD_SPECTRUM_2 0x0189 +#define mmCG_SPLL_AUTOSCALE_CNTL 0x018B +#define mmMPLL_BYPASSCLK_SEL 0x0197 +#define mmCG_CLKPIN_CNTL 0x0198 +#define mmCG_CLKPIN_CNTL_2 0x0199 +#define mmTHM_CLK_CNTL 0x019B +#define mmMISC_CLK_CNTL 0x019C +#define mmCG_THERMAL_CTRL 0x01C0 +#define mmCG_THERMAL_STATUS 0x01C1 +#define mmCG_THERMAL_INT 0x01C2 +#define mmCG_MULT_THERMAL_CTRL 0x01C4 +#define mmCG_MULT_THERMAL_STATUS 0x01C5 +#define mmCG_FDO_CTRL0 0x01D5 +#define mmCG_FDO_CTRL1 0x01D6 +#define mmCG_FDO_CTRL2 0x01D7 +#define mmCG_TACH_CTRL 0x01DC +#define mmCG_TACH_STATUS 0x01DD +#define mmGENERAL_PWRMGT 0x1E0 +#define mmCG_TPC 0x1E1 +#define mmSCLK_PWRMGT_CNTL 0x1E2 +#define mmTARGET_AND_CURRENT_PROFILE_INDEX 0x01E6 +#define mmCG_FTV 0x01EF +#define mmCG_FFCT_0 0x01F0 +#define mmCG_BSP 0x01FF +#define mmCG_AT 0x0200 +#define mmCG_GIT 0x0201 +#define mmCG_SSP 0x0203 +#define mmCG_DISPLAY_GAP_CNTL 0x020A +#define mmCG_ULV_CONTROL 0x021E +#define mmCG_ULV_PARAMETER 0x021F +#define mmSMC_SCRATCH0 0x0221 +#define mmCG_CAC_CTRL 0x022E + #define ixSMC_PC_C 0x80000370 + #define ixTHM_TMON0_DEBUG 0x03F0 #define ixTHM_TMON0_INT_DATA 0x0380 #define ixTHM_TMON0_RDIL0_DATA 0x0300 @@ -110,6 +152,7 @@ #define ixTHM_TMON1_RDIR7_DATA 0x0337 #define ixTHM_TMON1_RDIR8_DATA 0x0338 #define ixTHM_TMON1_RDIR9_DATA 0x0339 + #define mmGPIOPAD_A 0x05E7 #define mmGPIOPAD_EN 0x05E8 #define mmGPIOPAD_EXTERN_TRIG_CNTL 0x05F1 @@ -127,6 +170,7 @@ #define mmGPIOPAD_STRENGTH 0x05E5 #define mmGPIOPAD_SW_INT_STAT 0x05E4 #define mmGPIOPAD_Y 0x05E9 + #define mmSMC_IND_ACCESS_CNTL 0x008A #define mmSMC_IND_DATA_0 0x0081 #define mmSMC_IND_DATA 0x0081 diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h index 7d3925b7266e..67d3c7e13a48 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_6_0_sh_mask.h @@ -23,10 +23,142 @@ #ifndef SMU_6_0_SH_MASK_H #define SMU_6_0_SH_MASK_H -#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK 0x03ffffffL -#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT 0x00000000 -#define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK 0x000003f0L +#define CG_AT__CG_R_MASK 0x0000FFFFL +#define CG_AT__CG_R__SHIFT 0x00000000 +#define CG_AT__CG_L_MASK 0xFFFF0000L +#define CG_AT__CG_L__SHIFT 0x00000010 + +#define CG_BSP__BSP_MASK 0x0000FFFFL +#define CG_BSP__BSP__SHIFT 0x00000000 +#define CG_BSP__BSU_MASK 0x000F0000L +#define CG_BSP__BSU__SHIFT 0x00000010 + +#define CG_CAC_CTRL__CAC_WINDOW_MASK 0x00FFFFFFL +#define CG_CAC_CTRL__CAC_WINDOW__SHIFT 0x00000000 + +#define CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK 0x00000002L +#define CG_CLKPIN_CNTL__XTALIN_DIVIDE__SHIFT 0x00000001 +#define CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK 0x00000004L +#define CG_CLKPIN_CNTL__BCLK_AS_XCLK__SHIFT 0x00000002 +#define CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK 0x00000008L +#define CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN__SHIFT 0x00000003 +#define CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK 0x00000100L +#define CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK__SHIFT 0x00000008 + +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK 0x00000003L +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT 0x00000000 +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK 0x0000000CL +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT 0x00000002 +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_COUNT_MASK 0x0003FFF0L +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_COUNT__SHIFT 0x00000004 +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_UNIT_MASK 0x00700000 +#define CG_DISPLAY_GAP_CNTL__VBI_TIMER_UNIT__SHIFT 0x00000014 +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG_MASK 0x03000000L +#define CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG__SHIFT 0x00000018 +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG_MASK 0x0C000000L +#define CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG__SHIFT 0x0000001A + +#define CG_FFCT_0__UTC_0_MASK 0x000003FFL +#define CG_FFCT_0__UTC_0__SHIFT 0x00000000 +#define CG_FFCT_0__DTC_0_MASK 0x000FFC00L +#define CG_FFCT_0__DTC_0__SHIFT 0x0000000A + +#define CG_GIT__CG_GICST_MASK 0x0000FFFFL +#define CG_GIT__CG_GICST__SHIFT 0x00000000 +#define CG_GIT__CG_GIPOT_MASK 0xFFFF0000L +#define CG_GIT__CG_GIPOT__SHIFT 0x00000010 + +#define CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK 0x00000001L +#define CG_SPLL_FUNC_CNTL__SPLL_RESET__SHIFT 0x00000000 +#define CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK 0x00000002L +#define CG_SPLL_FUNC_CNTL__SPLL_SLEEP__SHIFT 0x00000001 +#define CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK 0x00000008L +#define CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN__SHIFT 0x00000003 +#define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK 0x000003F0L #define CG_SPLL_FUNC_CNTL__SPLL_REF_DIV__SHIFT 0x00000004 +#define CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK 0x007F00000 +#define CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT 0x00000014 +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK 0x0000001FF +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT 0x00000000 +#define CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK 0x00800000 +#define CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG__SHIFT 0x00000017 +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK 0x04000000 +#define CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE__SHIFT 0x0000001A +#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK 0x03FFFFFFL +#define CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT 0x00000000 +#define CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN_MASK 0x10000000L +#define CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN__SHIFT 0x0000001C +#define CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK 0x00000002L +#define CG_SPLL_STATUS__SPLL_CHG_STATUS__SHIFT 0x00000001 +#define CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK 0x00000001L +#define CG_SPLL_SPREAD_SPECTRUM__SSEN__SHIFT 0x00000000 +#define CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK 0x0000FFF0L +#define CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT 0x00000004 +#define CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK 0x00000200L +#define CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT 0x00000000 +#define CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK 0x03FFFFFFL +#define CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR__SHIFT 0x00000009 + +#define CG_SSP__SST_MASK 0x0000FFFFL +#define CG_SSP__SST__SHIFT 0x00000000 +#define CG_SSP__SSTU_MASK 0x000F0000L +#define CG_SSP__SSTU__SHIFT 0x00000010 + +#define CG_THERMAL_CTRL__DPM_EVENT_SRC_MASK 0x00000007L +#define CG_THERMAL_CTRL__DPM_EVENT_SRC__SHIFT 0x00000000 +#define CG_THERMAL_CTRL__DIG_THERM_DPM_MASK 0x003FC000 +#define CG_THERMAL_CTRL__DIG_THERM_DPM__SHIFT 0x0000000E +#define CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK 0x0001FE00L +#define CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT 0x00000009 +#define CG_THERMAL_INT__DIG_THERM_INTH_MASK 0x0000FF00L +#define CG_THERMAL_INT__DIG_THERM_INTH__SHIFT 0x00000008 +#define CG_THERMAL_INT__DIG_THERM_INTL_MASK 0x00FF0000L +#define CG_THERMAL_INT__DIG_THERM_INTL__SHIFT 0x00000010 +#define CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK 0x01000000L +#define CG_THERMAL_INT__THERM_INT_MASK_HIGH__SHIFT 0x00000018 +#define CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK 0x02000000 +#define CG_THERMAL_INT__THERM_INT_MASK_LOW__SHIFT 0x00000019 + +#define CG_MULT_THERMAL_CTRL__TEMP_SEL_MASK 0x0FF00000L +#define CG_MULT_THERMAL_CTRL__TEMP_SEL__SHIFT 0x00000014 +#define CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP_MASK 0x000001FFL +#define CG_MULT_THERMAL_STATUS__ASIC_MAX_TEMP__SHIFT 0x00000000 +#define CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK 0x0003fe00L +#define CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT 0x00000009 + +#define CG_FDO_CTRL0__FDO_STATIC_DUTY_MASK 0x000000FFL +#define CG_FDO_CTRL0__FDO_STATIC_DUTY__SHIFT 0x00000000 +#define CG_FDO_CTRL1__FMAX_DUTY100_MASK 0x000000FFL +#define CG_FDO_CTRL1__FMAX_DUTY100__SHIFT 0x00000000 +#define CG_FDO_CTRL2__TMIN_MASK 0x000000FFL +#define CG_FDO_CTRL2__TMIN__SHIFT 0x00000000 +#define CG_FDO_CTRL2__FDO_PWM_MODE_MASK 0x00003800L +#define CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT 0x0000000B +#define CG_FDO_CTRL2__TACH_PWM_RESP_RATE_MASK 0xFE000000L +#define CG_FDO_CTRL2__TACH_PWM_RESP_RATE__SHIFT 0x00000019 + +#define CG_TACH_CTRL__EDGE_PER_REV_MASK 0x00000007L +#define CG_TACH_CTRL__EDGE_PER_REV__SHIFT 0x00000000 +#define CG_TACH_CTRL__TARGET_PERIOD_MASK 0xFFFFFFF8L +#define CG_TACH_CTRL__TARGET_PERIOD__SHIFT 0x00000003 +#define CG_TACH_STATUS__TACH_PERIOD_MASK 0xFFFFFFFFL +#define CG_TACH_STATUS__TACH_PERIOD__SHIFT 0x00000000 + +#define GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK 0x00000001L +#define GENERAL_PWRMGT__GLOBAL_PWRMGT_EN__SHIFT 0x00000000 +#define GENERAL_PWRMGT__STATIC_PM_EN_MASK 0x00000002L +#define GENERAL_PWRMGT__STATIC_PM_EN__SHIFT 0x00000001 +#define GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK 0x00000004L +#define GENERAL_PWRMGT__THERMAL_PROTECTION_DIS__SHIFT 0x00000002 +#define GENERAL_PWRMGT__THERMAL_PROTECTION_TYPE_MASK 0x00000008L +#define GENERAL_PWRMGT__THERMAL_PROTECTION_TYPE__SHIFT 0x00000003 +#define GENERAL_PWRMGT__SW_SMIO_INDEX_MASK 0x00000040L +#define GENERAL_PWRMGT__SW_SMIO_INDEX__SHIFT 0x00000006 +#define GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK 0x00000400L +#define GENERAL_PWRMGT__VOLT_PWRMGT_EN__SHIFT 0x0000000A +#define GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK 0x00800000L +#define GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN__SHIFT 0x00000017 + #define GPIOPAD_A__GPIO_A_MASK 0x7fffffffL #define GPIOPAD_A__GPIO_A__SHIFT 0x00000000 #define GPIOPAD_EN__GPIO_EN_MASK 0x7fffffffL @@ -195,6 +327,7 @@ #define GPIOPAD_SW_INT_STAT__SW_INT_STAT__SHIFT 0x00000000 #define GPIOPAD_Y__GPIO_Y_MASK 0x7fffffffL #define GPIOPAD_Y__GPIO_Y__SHIFT 0x00000000 + #define LCAC_MC0_CNTL__MC0_ENABLE_MASK 0x00000001L #define LCAC_MC0_CNTL__MC0_ENABLE__SHIFT 0x00000000 #define LCAC_MC0_CNTL__MC0_THRESHOLD_MASK 0x0001fffeL @@ -243,6 +376,37 @@ #define LCAC_MC5_OVR_SEL__MC5_OVR_SEL__SHIFT 0x00000000 #define LCAC_MC5_OVR_VAL__MC5_OVR_VAL_MASK 0xffffffffL #define LCAC_MC5_OVR_VAL__MC5_OVR_VAL__SHIFT 0x00000000 + +#define MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK 0x0000FF00L +#define MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT 0x00000008 + +#define SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK 0x00000001L +#define SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF__SHIFT 0x00000000 +#define SCLK_PWRMGT_CNTL__SCLK_LOW_D1_MASK 0x00000002L +#define SCLK_PWRMGT_CNTL__SCLK_LOW_D1__SHIFT 0x00000001 +#define SCLK_PWRMGT_CNTL__FIR_RESET_MASK 0x00000010L +#define SCLK_PWRMGT_CNTL__FIR_RESET__SHIFT 0x00000004 +#define SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK 0x00000020L +#define SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL__SHIFT 0x00000005 +#define SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK 0x00000040L +#define SCLK_PWRMGT_CNTL__FIR_TREND_MODE__SHIFT 0x00000006 +#define SCLK_PWRMGT_CNTL__DYN_GFX_CLK_OFF_EN_MASK 0x00000080L +#define SCLK_PWRMGT_CNTL__DYN_GFX_CLK_OFF_EN__SHIFT 0x00000007 +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_ON_MASK 0x00000100L +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_ON__SHIFT 0x00000008 +#define SCLK_PWRMGT_CNTL__GFX_CLK_REQUEST_OFF_MASK 0x00000200L +#define SCLK_PWRMGT_CNTL__GFX_CLK_REQUEST_OFF__SHIFT 0x00000009 +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_OFF_MASK 0x00000400L +#define SCLK_PWRMGT_CNTL__GFX_CLK_FORCE_OFF__SHIFT 0x0000000A +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D1_MASK 0x00000800L +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D1__SHIFT 0x0000000B +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D2_MASK 0x00001000L +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D2__SHIFT 0x0000000C +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D3_MASK 0x00002000L +#define SCLK_PWRMGT_CNTL__GFX_CLK_OFF_ACPI_D3__SHIFT 0x0000000D +#define SCLK_PWRMGT_CNTL__DYN_LIGHT_SLEEP_EN_MASK 0x00004000L +#define SCLK_PWRMGT_CNTL__DYN_LIGHT_SLEEP_EN__SHIFT 0x0000000E + #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK 0x00000001L #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0__SHIFT 0x00000000 #define SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_1_MASK 0x00000100L @@ -285,6 +449,7 @@ #define SMC_RESP_1__SMC_RESP__SHIFT 0x00000000 #define SMC_RESP_2__SMC_RESP_MASK 0xffffffffL #define SMC_RESP_2__SMC_RESP__SHIFT 0x00000000 + #define SPLL_CNTL_MODE__SPLL_CTLREQ_DLY_CNT_MASK 0x000ff000L #define SPLL_CNTL_MODE__SPLL_CTLREQ_DLY_CNT__SHIFT 0x0000000c #define SPLL_CNTL_MODE__SPLL_ENSAT_MASK 0x00000010L @@ -293,6 +458,8 @@ #define SPLL_CNTL_MODE__SPLL_FASTEN__SHIFT 0x00000003 #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV_MASK 0x00000002L #define SPLL_CNTL_MODE__SPLL_LEGACY_PDIV__SHIFT 0x00000001 +#define SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK 0x0C000000L +#define SPLL_CNTL_MODE__SPLL_REFCLK_SEL__SHIFT 0x0000001A #define SPLL_CNTL_MODE__SPLL_RESET_EN_MASK 0x10000000L #define SPLL_CNTL_MODE__SPLL_RESET_EN__SHIFT 0x0000001c #define SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK 0x00000001L @@ -303,10 +470,25 @@ #define SPLL_CNTL_MODE__SPLL_TEST__SHIFT 0x00000002 #define SPLL_CNTL_MODE__SPLL_VCO_MODE_MASK 0x60000000L #define SPLL_CNTL_MODE__SPLL_VCO_MODE__SHIFT 0x0000001d + #define TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK 0x0f000000L #define TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT 0x00000018 #define TARGET_AND_CURRENT_PROFILE_INDEX_1__TARG_PCIE_INDEX_MASK 0xf0000000L #define TARGET_AND_CURRENT_PROFILE_INDEX_1__TARG_PCIE_INDEX__SHIFT 0x0000001c + +#define TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK 0x000000F0L +#define TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT 0x00000004 + +#define THM_CLK_CNTL__CMON_CLK_SEL_MASK 0x000000FFL +#define THM_CLK_CNTL__CMON_CLK_SEL__SHIFT 0x00000000 +#define THM_CLK_CNTL__TMON_CLK_SEL_MASK 0x0000FF00L +#define THM_CLK_CNTL__TMON_CLK_SEL__SHIFT 0x00000008 + +#define MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK 0x000000FFL +#define MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT 0x00000000 +#define MISC_CLK_CNTL__ZCLK_SEL_MASK 0x0000FF00L +#define MISC_CLK_CNTL__ZCLK_SEL__SHIFT 0x00000008 + #define THM_TMON0_DEBUG__DEBUG_RDI_MASK 0x0000001fL #define THM_TMON0_DEBUG__DEBUG_RDI__SHIFT 0x00000000 #define THM_TMON0_DEBUG__DEBUG_Z_MASK 0x0000ffe0L diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h index 14574112c469..c4aaa86a95e2 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_offset.h @@ -1147,6 +1147,22 @@ #define regUVD_DPG_LMA_CTL2_BASE_IDX 1 +// addressBlock: uvd_mmsch_dec +// base address: 0x20d2c +#define regMMSCH_VF_VMID 0x054b +#define regMMSCH_VF_VMID_BASE_IDX 1 +#define regMMSCH_VF_CTX_ADDR_LO 0x054c +#define regMMSCH_VF_CTX_ADDR_LO_BASE_IDX 1 +#define regMMSCH_VF_CTX_ADDR_HI 0x054d +#define regMMSCH_VF_CTX_ADDR_HI_BASE_IDX 1 +#define regMMSCH_VF_CTX_SIZE 0x054e +#define regMMSCH_VF_CTX_SIZE_BASE_IDX 1 +#define regMMSCH_VF_MAILBOX_HOST 0x0552 +#define regMMSCH_VF_MAILBOX_HOST_BASE_IDX 1 +#define regMMSCH_VF_MAILBOX_RESP 0x0553 +#define regMMSCH_VF_MAILBOX_RESP_BASE_IDX 1 + + // addressBlock: uvd_vcn_umsch_dec // base address: 0x21500 #define regVCN_UMSCH_MES_CNTL 0x0740 diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h index 5c119a6b87fb..bd7242e4e9c6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_5_0_0_sh_mask.h @@ -5929,6 +5929,29 @@ #define UVD_DPG_LMA_CTL2__JPEG_WRITE_PTR_MASK 0x0000FE00L +// addressBlock: uvd_mmsch_dec +//MMSCH_VF_VMID +#define MMSCH_VF_VMID__VF_CTX_VMID__SHIFT 0x0 +#define MMSCH_VF_VMID__VF_GPCOM_VMID__SHIFT 0x5 +#define MMSCH_VF_VMID__VF_CTX_VMID_MASK 0x0000001FL +#define MMSCH_VF_VMID__VF_GPCOM_VMID_MASK 0x000003E0L +//MMSCH_VF_CTX_ADDR_LO +#define MMSCH_VF_CTX_ADDR_LO__VF_CTX_ADDR_LO__SHIFT 0x6 +#define MMSCH_VF_CTX_ADDR_LO__VF_CTX_ADDR_LO_MASK 0xFFFFFFC0L +//MMSCH_VF_CTX_ADDR_HI +#define MMSCH_VF_CTX_ADDR_HI__VF_CTX_ADDR_HI__SHIFT 0x0 +#define MMSCH_VF_CTX_ADDR_HI__VF_CTX_ADDR_HI_MASK 0xFFFFFFFFL +//MMSCH_VF_CTX_SIZE +#define MMSCH_VF_CTX_SIZE__VF_CTX_SIZE__SHIFT 0x0 +#define MMSCH_VF_CTX_SIZE__VF_CTX_SIZE_MASK 0xFFFFFFFFL +//MMSCH_VF_MAILBOX_HOST +#define MMSCH_VF_MAILBOX_HOST__DATA__SHIFT 0x0 +#define MMSCH_VF_MAILBOX_HOST__DATA_MASK 0xFFFFFFFFL +//MMSCH_VF_MAILBOX_RESP +#define MMSCH_VF_MAILBOX_RESP__RESP__SHIFT 0x0 +#define MMSCH_VF_MAILBOX_RESP__RESP_MASK 0xFFFFFFFFL + + // addressBlock: uvd_vcn_umsch_dec //VCN_UMSCH_MES_CNTL #define VCN_UMSCH_MES_CNTL__PIPE_ID__SHIFT 0x0 diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index b78360a71bc9..52bac19fb404 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -4308,7 +4308,7 @@ typedef struct _ATOM_DPCD_INFO // note2: From RV770, the memory is more than 32bit addressable, so we will change // ucTableFormatRevision=1,ucTableContentRevision=4, the strcuture remains // exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware -// (in offset to start of memory address) is KB aligned instead of byte aligend. +// (in offset to start of memory address) is KB aligned instead of byte aligned. // Note3: /* If we change usReserved to "usFBUsedbyDrvInKB", then to VBIOS this usFBUsedbyDrvInKB is a predefined, unchanged constant across VGA or non VGA adapter, diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 0160d65f3f5e..2d1135bdc4b9 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -183,6 +183,7 @@ enum atom_dgpu_vram_type { ATOM_DGPU_VRAM_TYPE_HBM2E = 0x61, ATOM_DGPU_VRAM_TYPE_GDDR6 = 0x70, ATOM_DGPU_VRAM_TYPE_HBM3 = 0x80, + ATOM_DGPU_VRAM_TYPE_HBM3E = 0x81, }; enum atom_dp_vs_preemph_def{ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 21dc956b5f35..0f7542d7074b 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -128,6 +128,7 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_CPU_CLK, AMDGPU_PP_SENSOR_VDDNB, AMDGPU_PP_SENSOR_VDDGFX, + AMDGPU_PP_SENSOR_VDDBOARD, AMDGPU_PP_SENSOR_UVD_VCLK, AMDGPU_PP_SENSOR_UVD_DCLK, AMDGPU_PP_SENSOR_VCE_ECCLK, diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index f8008270f813..3728389fc3be 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -535,8 +535,8 @@ struct v11_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t reserved_510; // offset: 510 (0x1FE) - uint32_t reserved_511; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v11_sdma_mqd { @@ -1118,8 +1118,8 @@ struct v11_compute_mqd { uint32_t reserved_443; // offset: 443 (0x1BB) uint32_t reserved_444; // offset: 444 (0x1BC) uint32_t reserved_445; // offset: 445 (0x1BD) - uint32_t reserved_446; // offset: 446 (0x1BE) - uint32_t reserved_447; // offset: 447 (0x1BF) + uint32_t fence_address_lo; // offset: 446 (0x1BE) + uint32_t fence_address_hi; // offset: 447 (0x1BF) uint32_t gws_0_val; // offset: 448 (0x1C0) uint32_t gws_1_val; // offset: 449 (0x1C1) uint32_t gws_2_val; // offset: 450 (0x1C2) diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h index 5eabab611b02..03a35f8a65b0 100644 --- a/drivers/gpu/drm/amd/include/v12_structs.h +++ b/drivers/gpu/drm/amd/include/v12_structs.h @@ -535,8 +535,8 @@ struct v12_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t reserved_510; // offset: 510 (0x1FE) - uint32_t reserved_511; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v12_sdma_mqd { @@ -1118,8 +1118,8 @@ struct v12_compute_mqd { uint32_t reserved_443; // offset: 443 (0x1BB) uint32_t reserved_444; // offset: 444 (0x1BC) uint32_t reserved_445; // offset: 445 (0x1BD) - uint32_t reserved_446; // offset: 446 (0x1BE) - uint32_t reserved_447; // offset: 447 (0x1BF) + uint32_t fence_address_lo; // offset: 446 (0x1BE) + uint32_t fence_address_hi; // offset: 447 (0x1BF) uint32_t gws_0_val; // offset: 448 (0x1C0) uint32_t gws_1_val; // offset: 449 (0x1C1) uint32_t gws_2_val; // offset: 450 (0x1C2) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 3533d43ed1e7..2148c8db5a59 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -329,6 +329,34 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev) return ret; } +bool amdgpu_dpm_is_link_reset_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + bool support_link_reset = false; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + support_link_reset = smu_link_reset_is_support(smu); + mutex_unlock(&adev->pm.mutex); + } + + return support_link_reset; +} + +int amdgpu_dpm_link_reset(struct amdgpu_device *adev) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = -EOPNOTSUPP; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + ret = smu_link_reset(smu); + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en) @@ -780,6 +808,21 @@ int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask) return ret; } +int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_reset_vcn(smu, inst_mask); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index d533c79f7e21..edd9895b46c0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2937,6 +2937,23 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, return sysfs_emit(buf, "%d\n", vddgfx); } +static ssize_t amdgpu_hwmon_show_vddboard(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + u32 vddboard; + int r; + + /* get the voltage */ + r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, + (void *)&vddboard); + if (r) + return r; + + return sysfs_emit(buf, "%d\n", vddboard); +} + static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev, struct device_attribute *attr, char *buf) @@ -2944,6 +2961,12 @@ static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev, return sysfs_emit(buf, "vddgfx\n"); } +static ssize_t amdgpu_hwmon_show_vddboard_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "vddboard\n"); +} static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, struct device_attribute *attr, char *buf) @@ -3287,6 +3310,8 @@ static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0) static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0); static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0); static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0); +static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, amdgpu_hwmon_show_vddboard, NULL, 0); +static SENSOR_DEVICE_ATTR(in2_label, S_IRUGO, amdgpu_hwmon_show_vddboard_label, NULL, 0); static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0); static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, amdgpu_hwmon_show_power_input, NULL, 0); static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0); @@ -3334,6 +3359,8 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_in0_label.dev_attr.attr, &sensor_dev_attr_in1_input.dev_attr.attr, &sensor_dev_attr_in1_label.dev_attr.attr, + &sensor_dev_attr_in2_input.dev_attr.attr, + &sensor_dev_attr_in2_label.dev_attr.attr, &sensor_dev_attr_power1_average.dev_attr.attr, &sensor_dev_attr_power1_input.dev_attr.attr, &sensor_dev_attr_power1_cap_max.dev_attr.attr, @@ -3485,6 +3512,13 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_in1_label.dev_attr.attr)) return 0; + /* only few boards support vddboard */ + if ((attr == &sensor_dev_attr_in2_input.dev_attr.attr || + attr == &sensor_dev_attr_in2_label.dev_attr.attr) && + amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, + (void *)&tmp) == -EOPNOTSUPP) + return 0; + /* no mclk on APUs other than gc 9,4,3*/ if (((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(9, 4, 3))) && (attr == &sensor_dev_attr_freq2_input.dev_attr.attr || diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 4c0f7ad14816..2c3c97587dd5 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -416,11 +416,13 @@ int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev, int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); +int amdgpu_dpm_link_reset(struct amdgpu_device *adev); int amdgpu_dpm_enable_gfx_features(struct amdgpu_device *adev); int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_link_reset_supported(struct amdgpu_device *adev); int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, @@ -607,5 +609,6 @@ ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, enum pp_pm_policy p_type, char *buf); int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev); +int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 1c25f3023e93..4c0e976004ba 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -30,16 +30,32 @@ #include "amdgpu_atombios.h" #include "amdgpu_dpm_internal.h" #include "amd_pcie.h" -#include "sid.h" -#include "r600_dpm.h" -#include "si_dpm.h" #include "atom.h" +#include "gfx_v6_0.h" +#include "r600_dpm.h" +#include "sid.h" +#include "si_dpm.h" #include "../include/pptable.h" #include #include #include #include +#include "bif/bif_3_0_d.h" +#include "bif/bif_3_0_sh_mask.h" + +#include "dce/dce_6_0_d.h" +#include "dce/dce_6_0_sh_mask.h" + +#include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" + +#include"gmc/gmc_6_0_d.h" +#include"gmc/gmc_6_0_sh_mask.h" + +#include "smu/smu_6_0_d.h" +#include "smu/smu_6_0_sh_mask.h" + #define MC_CG_ARB_FREQ_F0 0x0a #define MC_CG_ARB_FREQ_F1 0x0b #define MC_CG_ARB_FREQ_F2 0x0c @@ -2193,7 +2209,7 @@ static u32 si_calculate_cac_wintime(struct amdgpu_device *adev) if (xclk == 0) return 0; - cac_window = RREG32(CG_CAC_CTRL) & CAC_WINDOW_MASK; + cac_window = RREG32(mmCG_CAC_CTRL) & CG_CAC_CTRL__CAC_WINDOW_MASK; cac_window_size = ((cac_window & 0xFFFF0000) >> 16) * (cac_window & 0x0000FFFF); wintime = (cac_window_size * 100) / xclk; @@ -2489,19 +2505,19 @@ static int si_populate_sq_ramping_values(struct amdgpu_device *adev, if (adev->pm.dpm.sq_ramping_threshold == 0) return -EINVAL; - if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER > (MAX_POWER_MASK >> MAX_POWER_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER > (SQ_POWER_THROTTLE__MAX_POWER_MASK >> SQ_POWER_THROTTLE__MAX_POWER__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_MIN_POWER > (MIN_POWER_MASK >> MIN_POWER_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_MIN_POWER > (SQ_POWER_THROTTLE__MIN_POWER_MASK >> SQ_POWER_THROTTLE__MIN_POWER__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA > (MAX_POWER_DELTA_MASK >> MAX_POWER_DELTA_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA > (SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK >> SQ_POWER_THROTTLE2__MAX_POWER_DELTA__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_STI_SIZE > (STI_SIZE_MASK >> STI_SIZE_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_STI_SIZE > (SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK >> SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE__SHIFT)) enable_sq_ramping = false; - if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (LTI_RATIO_MASK >> LTI_RATIO_SHIFT)) + if (SISLANDS_DPM2_SQ_RAMP_LTI_RATIO > (SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK >> SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO__SHIFT)) enable_sq_ramping = false; for (i = 0; i < state->performance_level_count; i++) { @@ -2510,14 +2526,17 @@ static int si_populate_sq_ramping_values(struct amdgpu_device *adev, if ((state->performance_levels[i].sclk >= adev->pm.dpm.sq_ramping_threshold) && enable_sq_ramping) { - sq_power_throttle |= MAX_POWER(SISLANDS_DPM2_SQ_RAMP_MAX_POWER); - sq_power_throttle |= MIN_POWER(SISLANDS_DPM2_SQ_RAMP_MIN_POWER); - sq_power_throttle2 |= MAX_POWER_DELTA(SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA); - sq_power_throttle2 |= STI_SIZE(SISLANDS_DPM2_SQ_RAMP_STI_SIZE); - sq_power_throttle2 |= LTI_RATIO(SISLANDS_DPM2_SQ_RAMP_LTI_RATIO); + sq_power_throttle |= SISLANDS_DPM2_SQ_RAMP_MAX_POWER << SQ_POWER_THROTTLE__MAX_POWER__SHIFT; + sq_power_throttle |= SISLANDS_DPM2_SQ_RAMP_MIN_POWER << SQ_POWER_THROTTLE__MIN_POWER__SHIFT; + sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_MAX_POWER_DELTA << SQ_POWER_THROTTLE2__MAX_POWER_DELTA__SHIFT; + sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_STI_SIZE << SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE__SHIFT; + sq_power_throttle2 |= SISLANDS_DPM2_SQ_RAMP_LTI_RATIO << SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO__SHIFT; } else { - sq_power_throttle |= MAX_POWER_MASK | MIN_POWER_MASK; - sq_power_throttle2 |= MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; + sq_power_throttle |= SQ_POWER_THROTTLE__MAX_POWER_MASK | + SQ_POWER_THROTTLE__MIN_POWER_MASK; + sq_power_throttle2 |= SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK | + SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK | + SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK; } smc_state->levels[i].SQPowerThrottle = cpu_to_be32(sq_power_throttle); @@ -2761,9 +2780,9 @@ static int si_initialize_smc_cac_tables(struct amdgpu_device *adev) if (!cac_tables) return -ENOMEM; - reg = RREG32(CG_CAC_CTRL) & ~CAC_WINDOW_MASK; - reg |= CAC_WINDOW(si_pi->powertune_data->cac_window); - WREG32(CG_CAC_CTRL, reg); + reg = RREG32(mmCG_CAC_CTRL) & ~CG_CAC_CTRL__CAC_WINDOW_MASK; + reg |= (si_pi->powertune_data->cac_window << CG_CAC_CTRL__CAC_WINDOW__SHIFT); + WREG32(mmCG_CAC_CTRL, reg); si_pi->dyn_powertune_data.cac_leakage = adev->pm.dpm.cac_leakage; si_pi->dyn_powertune_data.dc_pwr_value = @@ -2962,10 +2981,10 @@ static int si_init_smc_spll_table(struct amdgpu_device *adev) ret = si_calculate_sclk_params(adev, sclk, &sclk_params); if (ret) break; - p_div = (sclk_params.vCG_SPLL_FUNC_CNTL & SPLL_PDIV_A_MASK) >> SPLL_PDIV_A_SHIFT; - fb_div = (sclk_params.vCG_SPLL_FUNC_CNTL_3 & SPLL_FB_DIV_MASK) >> SPLL_FB_DIV_SHIFT; - clk_s = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM & CLK_S_MASK) >> CLK_S_SHIFT; - clk_v = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM_2 & CLK_V_MASK) >> CLK_V_SHIFT; + p_div = (sclk_params.vCG_SPLL_FUNC_CNTL & CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK) >> CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT; + fb_div = (sclk_params.vCG_SPLL_FUNC_CNTL_3 & CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK) >> CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT; + clk_s = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM & CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK) >> CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT; + clk_v = (sclk_params.vCG_SPLL_SPREAD_SPECTRUM_2 & CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK) >> CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT; fb_div &= ~0x00001FFF; fb_div >>= 1; @@ -3669,10 +3688,10 @@ static bool si_is_special_1gb_platform(struct amdgpu_device *adev) WREG32(MC_SEQ_IO_DEBUG_INDEX, 0xb); width = ((RREG32(MC_SEQ_IO_DEBUG_DATA) >> 1) & 1) ? 16 : 32; - tmp = RREG32(MC_ARB_RAMCFG); - row = ((tmp & NOOFROWS_MASK) >> NOOFROWS_SHIFT) + 10; - column = ((tmp & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT) + 8; - bank = ((tmp & NOOFBANK_MASK) >> NOOFBANK_SHIFT) + 2; + tmp = RREG32(mmMC_ARB_RAMCFG); + row = ((tmp & MC_ARB_RAMCFG__NOOFROWS_MASK) >> MC_ARB_RAMCFG__NOOFROWS__SHIFT) + 10; + column = ((tmp & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT) + 8; + bank = ((tmp & MC_ARB_RAMCFG__NOOFBANK_MASK) >> MC_ARB_RAMCFG__NOOFBANK__SHIFT) + 2; density = (1 << (row + column - 20 + bank)) * width; @@ -3756,11 +3775,11 @@ static void si_set_dpm_event_sources(struct amdgpu_device *adev, u32 sources) } if (want_thermal_protection) { - WREG32_P(CG_THERMAL_CTRL, DPM_EVENT_SRC(dpm_event_src), ~DPM_EVENT_SRC_MASK); + WREG32_P(mmCG_THERMAL_CTRL, dpm_event_src << CG_THERMAL_CTRL__DPM_EVENT_SRC__SHIFT, ~CG_THERMAL_CTRL__DPM_EVENT_SRC_MASK); if (pi->thermal_protection) - WREG32_P(GENERAL_PWRMGT, 0, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); } else { - WREG32_P(GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); } } @@ -3785,20 +3804,20 @@ static void si_enable_auto_throttle_source(struct amdgpu_device *adev, static void si_start_dpm(struct amdgpu_device *adev) { - WREG32_P(GENERAL_PWRMGT, GLOBAL_PWRMGT_EN, ~GLOBAL_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK, ~GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK); } static void si_stop_dpm(struct amdgpu_device *adev) { - WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__GLOBAL_PWRMGT_EN_MASK); } static void si_enable_sclk_control(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32_P(SCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_OFF); + WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK); else - WREG32_P(SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF); + WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK, ~SCLK_PWRMGT_CNTL__SCLK_PWRMGT_OFF_MASK); } @@ -3838,7 +3857,7 @@ static int si_notify_hw_of_powersource(struct amdgpu_device *adev, bool ac_power static PPSMC_Result si_send_msg_to_smc_with_parameter(struct amdgpu_device *adev, PPSMC_Msg msg, u32 parameter) { - WREG32(SMC_SCRATCH0, parameter); + WREG32(mmSMC_SCRATCH0, parameter); return amdgpu_si_send_msg_to_smc(adev, msg); } @@ -4023,12 +4042,12 @@ static void si_read_clock_registers(struct amdgpu_device *adev) { struct si_power_info *si_pi = si_get_pi(adev); - si_pi->clock_registers.cg_spll_func_cntl = RREG32(CG_SPLL_FUNC_CNTL); - si_pi->clock_registers.cg_spll_func_cntl_2 = RREG32(CG_SPLL_FUNC_CNTL_2); - si_pi->clock_registers.cg_spll_func_cntl_3 = RREG32(CG_SPLL_FUNC_CNTL_3); - si_pi->clock_registers.cg_spll_func_cntl_4 = RREG32(CG_SPLL_FUNC_CNTL_4); - si_pi->clock_registers.cg_spll_spread_spectrum = RREG32(CG_SPLL_SPREAD_SPECTRUM); - si_pi->clock_registers.cg_spll_spread_spectrum_2 = RREG32(CG_SPLL_SPREAD_SPECTRUM_2); + si_pi->clock_registers.cg_spll_func_cntl = RREG32(mmCG_SPLL_FUNC_CNTL); + si_pi->clock_registers.cg_spll_func_cntl_2 = RREG32(mmCG_SPLL_FUNC_CNTL_2); + si_pi->clock_registers.cg_spll_func_cntl_3 = RREG32(mmCG_SPLL_FUNC_CNTL_3); + si_pi->clock_registers.cg_spll_func_cntl_4 = RREG32(mmCG_SPLL_FUNC_CNTL_4); + si_pi->clock_registers.cg_spll_spread_spectrum = RREG32(mmCG_SPLL_SPREAD_SPECTRUM); + si_pi->clock_registers.cg_spll_spread_spectrum_2 = RREG32(mmCG_SPLL_SPREAD_SPECTRUM_2); si_pi->clock_registers.dll_cntl = RREG32(DLL_CNTL); si_pi->clock_registers.mclk_pwrmgt_cntl = RREG32(MCLK_PWRMGT_CNTL); si_pi->clock_registers.mpll_ad_func_cntl = RREG32(MPLL_AD_FUNC_CNTL); @@ -4044,14 +4063,14 @@ static void si_enable_thermal_protection(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32_P(GENERAL_PWRMGT, 0, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); else - WREG32_P(GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, ~THERMAL_PROTECTION_DIS); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK, ~GENERAL_PWRMGT__THERMAL_PROTECTION_DIS_MASK); } static void si_enable_acpi_power_management(struct amdgpu_device *adev) { - WREG32_P(GENERAL_PWRMGT, STATIC_PM_EN, ~STATIC_PM_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__STATIC_PM_EN_MASK, ~GENERAL_PWRMGT__STATIC_PM_EN_MASK); } #if 0 @@ -4132,9 +4151,9 @@ static void si_program_ds_registers(struct amdgpu_device *adev) tmp = 0x1; if (eg_pi->sclk_deep_sleep) { - WREG32_P(MISC_CLK_CNTL, DEEP_SLEEP_CLK_SEL(tmp), ~DEEP_SLEEP_CLK_SEL_MASK); - WREG32_P(CG_SPLL_AUTOSCALE_CNTL, AUTOSCALE_ON_SS_CLEAR, - ~AUTOSCALE_ON_SS_CLEAR); + WREG32_P(mmMISC_CLK_CNTL, (tmp << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT), ~MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK); + WREG32_P(mmCG_SPLL_AUTOSCALE_CNTL, CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK, + ~CG_SPLL_AUTOSCALE_CNTL__AUTOSCALE_ON_SS_CLEAR_MASK); } } @@ -4143,18 +4162,18 @@ static void si_program_display_gap(struct amdgpu_device *adev) u32 tmp, pipe; int i; - tmp = RREG32(CG_DISPLAY_GAP_CNTL) & ~(DISP1_GAP_MASK | DISP2_GAP_MASK); + tmp = RREG32(mmCG_DISPLAY_GAP_CNTL) & ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK); if (adev->pm.dpm.new_active_crtc_count > 0) - tmp |= DISP1_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM); + tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; else - tmp |= DISP1_GAP(R600_PM_DISPLAY_GAP_IGNORE); + tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; if (adev->pm.dpm.new_active_crtc_count > 1) - tmp |= DISP2_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM); + tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; else - tmp |= DISP2_GAP(R600_PM_DISPLAY_GAP_IGNORE); + tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; - WREG32(CG_DISPLAY_GAP_CNTL, tmp); + WREG32(mmCG_DISPLAY_GAP_CNTL, tmp); tmp = RREG32(DCCG_DISP_SLOW_SELECT_REG); pipe = (tmp & DCCG_DISP1_SLOW_SELECT_MASK) >> DCCG_DISP1_SLOW_SELECT_SHIFT; @@ -4189,10 +4208,10 @@ static void si_enable_spread_spectrum(struct amdgpu_device *adev, bool enable) if (enable) { if (pi->sclk_ss) - WREG32_P(GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, ~DYN_SPREAD_SPECTRUM_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK, ~GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK); } else { - WREG32_P(CG_SPLL_SPREAD_SPECTRUM, 0, ~SSEN); - WREG32_P(GENERAL_PWRMGT, 0, ~DYN_SPREAD_SPECTRUM_EN); + WREG32_P(mmCG_SPLL_SPREAD_SPECTRUM, 0, ~CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__DYN_SPREAD_SPECTRUM_EN_MASK); } } @@ -4214,15 +4233,15 @@ static void si_setup_bsp(struct amdgpu_device *adev) &pi->pbsu); - pi->dsp = BSP(pi->bsp) | BSU(pi->bsu); - pi->psp = BSP(pi->pbsp) | BSU(pi->pbsu); + pi->dsp = (pi->bsp << CG_BSP__BSP__SHIFT) | (pi->bsu << CG_BSP__BSU__SHIFT); + pi->psp = (pi->pbsp << CG_BSP__BSP__SHIFT) | (pi->pbsu << CG_BSP__BSU__SHIFT); - WREG32(CG_BSP, pi->dsp); + WREG32(mmCG_BSP, pi->dsp); } static void si_program_git(struct amdgpu_device *adev) { - WREG32_P(CG_GIT, CG_GICST(R600_GICST_DFLT), ~CG_GICST_MASK); + WREG32_P(mmCG_GIT, R600_GICST_DFLT << CG_GIT__CG_GICST__SHIFT, ~CG_GIT__CG_GICST_MASK); } static void si_program_tp(struct amdgpu_device *adev) @@ -4231,54 +4250,54 @@ static void si_program_tp(struct amdgpu_device *adev) enum r600_td td = R600_TD_DFLT; for (i = 0; i < R600_PM_NUMBER_OF_TC; i++) - WREG32(CG_FFCT_0 + i, (UTC_0(r600_utc[i]) | DTC_0(r600_dtc[i]))); + WREG32(mmCG_FFCT_0 + i, (r600_utc[i] << CG_FFCT_0__UTC_0__SHIFT | r600_dtc[i] << CG_FFCT_0__DTC_0__SHIFT)); if (td == R600_TD_AUTO) - WREG32_P(SCLK_PWRMGT_CNTL, 0, ~FIR_FORCE_TREND_SEL); + WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK); else - WREG32_P(SCLK_PWRMGT_CNTL, FIR_FORCE_TREND_SEL, ~FIR_FORCE_TREND_SEL); + WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK, ~SCLK_PWRMGT_CNTL__FIR_FORCE_TREND_SEL_MASK); if (td == R600_TD_UP) - WREG32_P(SCLK_PWRMGT_CNTL, 0, ~FIR_TREND_MODE); + WREG32_P(mmSCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK); if (td == R600_TD_DOWN) - WREG32_P(SCLK_PWRMGT_CNTL, FIR_TREND_MODE, ~FIR_TREND_MODE); + WREG32_P(mmSCLK_PWRMGT_CNTL, SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK, ~SCLK_PWRMGT_CNTL__FIR_TREND_MODE_MASK); } static void si_program_tpp(struct amdgpu_device *adev) { - WREG32(CG_TPC, R600_TPC_DFLT); + WREG32(mmCG_TPC, R600_TPC_DFLT); } static void si_program_sstp(struct amdgpu_device *adev) { - WREG32(CG_SSP, (SSTU(R600_SSTU_DFLT) | SST(R600_SST_DFLT))); + WREG32(mmCG_SSP, (R600_SSTU_DFLT << CG_SSP__SSTU__SHIFT| R600_SST_DFLT << CG_SSP__SST__SHIFT)); } static void si_enable_display_gap(struct amdgpu_device *adev) { - u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL); + u32 tmp = RREG32(mmCG_DISPLAY_GAP_CNTL); - tmp &= ~(DISP1_GAP_MASK | DISP2_GAP_MASK); - tmp |= (DISP1_GAP(R600_PM_DISPLAY_GAP_IGNORE) | - DISP2_GAP(R600_PM_DISPLAY_GAP_IGNORE)); + tmp &= ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK); + tmp |= (R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT | + R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT); - tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK); - tmp |= (DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK) | - DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE)); - WREG32(CG_DISPLAY_GAP_CNTL, tmp); + tmp &= ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG_MASK); + tmp |= (R600_PM_DISPLAY_GAP_VBLANK << CG_DISPLAY_GAP_CNTL__DISP1_GAP_MCHG__SHIFT | + R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP_MCHG__SHIFT); + WREG32(mmCG_DISPLAY_GAP_CNTL, tmp); } static void si_program_vc(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); - WREG32(CG_FTV, pi->vrc); + WREG32(mmCG_FTV, pi->vrc); } static void si_clear_vc(struct amdgpu_device *adev) { - WREG32(CG_FTV, 0); + WREG32(mmCG_FTV, 0); } static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock) @@ -4735,7 +4754,7 @@ static u32 si_calculate_memory_refresh_rate(struct amdgpu_device *adev, u32 dram_rows; u32 dram_refresh_rate; u32 mc_arb_rfsh_rate; - u32 tmp = (RREG32(MC_ARB_RAMCFG) & NOOFROWS_MASK) >> NOOFROWS_SHIFT; + u32 tmp = (RREG32(mmMC_ARB_RAMCFG) & MC_ARB_RAMCFG__NOOFROWS_MASK) >> MC_ARB_RAMCFG__NOOFROWS__SHIFT; if (tmp >= 4) dram_rows = 16384; @@ -4909,7 +4928,7 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev, si_populate_initial_mvdd_value(adev, &table->initialState.level.mvdd); - reg = CG_R(0xffff) | CG_L(0); + reg = 0xffff << CG_AT__CG_R__SHIFT | 0 << CG_AT__CG_L__SHIFT; table->initialState.level.aT = cpu_to_be32(reg); table->initialState.level.bSP = cpu_to_be32(pi->dsp); table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen; @@ -4935,10 +4954,13 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev, table->initialState.level.dpm2.BelowSafeInc = 0; table->initialState.level.dpm2.PwrEfficiencyRatio = 0; - reg = MIN_POWER_MASK | MAX_POWER_MASK; + reg = SQ_POWER_THROTTLE__MIN_POWER_MASK | + SQ_POWER_THROTTLE__MAX_POWER_MASK; table->initialState.level.SQPowerThrottle = cpu_to_be32(reg); - reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; + reg = SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK | + SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK | + SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK; table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; @@ -5057,8 +5079,8 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, dll_cntl &= ~(MRDCK0_BYPASS | MRDCK1_BYPASS); - spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; - spll_func_cntl_2 |= SCLK_MUX_SEL(4); + spll_func_cntl_2 &= ~CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK; + spll_func_cntl_2 |= 4 << CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT; table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl); @@ -5102,10 +5124,10 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, table->ACPIState.level.dpm2.BelowSafeInc = 0; table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0; - reg = MIN_POWER_MASK | MAX_POWER_MASK; + reg = SQ_POWER_THROTTLE__MIN_POWER_MASK | SQ_POWER_THROTTLE__MAX_POWER_MASK; table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg); - reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK; + reg = SQ_POWER_THROTTLE2__MAX_POWER_DELTA_MASK | SQ_POWER_THROTTLE2__SHORT_TERM_INTERVAL_SIZE_MASK | SQ_POWER_THROTTLE2__LONG_TERM_INTERVAL_RATIO_MASK; table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg); return 0; @@ -5250,8 +5272,8 @@ static int si_init_smc_table(struct amdgpu_device *adev) if (ret) return ret; - WREG32(CG_ULV_CONTROL, ulv->cg_ulv_control); - WREG32(CG_ULV_PARAMETER, ulv->cg_ulv_parameter); + WREG32(mmCG_ULV_CONTROL, ulv->cg_ulv_control); + WREG32(mmCG_ULV_PARAMETER, ulv->cg_ulv_parameter); lane_width = amdgpu_get_pcie_lanes(adev); si_write_smc_soft_register(adev, SI_SMC_SOFT_REGISTER_non_ulv_pcie_link_width, lane_width); @@ -5294,16 +5316,16 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev, do_div(tmp, reference_clock); fbdiv = (u32) tmp; - spll_func_cntl &= ~(SPLL_PDIV_A_MASK | SPLL_REF_DIV_MASK); - spll_func_cntl |= SPLL_REF_DIV(dividers.ref_div); - spll_func_cntl |= SPLL_PDIV_A(dividers.post_div); + spll_func_cntl &= ~(CG_SPLL_FUNC_CNTL__SPLL_PDIV_A_MASK | CG_SPLL_FUNC_CNTL__SPLL_REF_DIV_MASK); + spll_func_cntl |= dividers.ref_div << CG_SPLL_FUNC_CNTL__SPLL_REF_DIV__SHIFT; + spll_func_cntl |= dividers.post_div << CG_SPLL_FUNC_CNTL__SPLL_PDIV_A__SHIFT; - spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK; - spll_func_cntl_2 |= SCLK_MUX_SEL(2); + spll_func_cntl_2 &= ~CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL_MASK; + spll_func_cntl_2 |= 2 << CG_SPLL_FUNC_CNTL_2__SCLK_MUX_SEL__SHIFT; - spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK; - spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv); - spll_func_cntl_3 |= SPLL_DITHEN; + spll_func_cntl_3 &= ~CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV_MASK; + spll_func_cntl_3 |= fbdiv << CG_SPLL_FUNC_CNTL_3__SPLL_FB_DIV__SHIFT; + spll_func_cntl_3 |= CG_SPLL_FUNC_CNTL_3__SPLL_DITHEN_MASK; if (pi->sclk_ss) { struct amdgpu_atom_ss ss; @@ -5314,12 +5336,12 @@ static int si_calculate_sclk_params(struct amdgpu_device *adev, u32 clk_s = reference_clock * 5 / (reference_divider * ss.rate); u32 clk_v = 4 * ss.percentage * fbdiv / (clk_s * 10000); - cg_spll_spread_spectrum &= ~CLK_S_MASK; - cg_spll_spread_spectrum |= CLK_S(clk_s); - cg_spll_spread_spectrum |= SSEN; + cg_spll_spread_spectrum &= ~CG_SPLL_SPREAD_SPECTRUM__CLK_S_MASK; + cg_spll_spread_spectrum |= clk_s << CG_SPLL_SPREAD_SPECTRUM__CLK_S__SHIFT; + cg_spll_spread_spectrum |= CG_SPLL_SPREAD_SPECTRUM__SSEN_MASK; - cg_spll_spread_spectrum_2 &= ~CLK_V_MASK; - cg_spll_spread_spectrum_2 |= CLK_V(clk_v); + cg_spll_spread_spectrum_2 &= ~CG_SPLL_SPREAD_SPECTRUM_2__CLK_V_MASK; + cg_spll_spread_spectrum_2 |= clk_v << CG_SPLL_SPREAD_SPECTRUM_2__CLK_V__SHIFT; } } @@ -5485,7 +5507,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev, if (pi->mclk_stutter_mode_threshold && (pl->mclk <= pi->mclk_stutter_mode_threshold) && !eg_pi->uvd_enabled && - (RREG32(DPG_PIPE_STUTTER_CONTROL) & STUTTER_ENABLE) && + (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) && (adev->pm.dpm.new_active_crtc_count <= 2)) { level->mcFlags |= SISLANDS_SMC_MC_STUTTER_EN; } @@ -5579,7 +5601,7 @@ static int si_populate_smc_t(struct amdgpu_device *adev, return -EINVAL; if (state->performance_level_count < 2) { - a_t = CG_R(0xffff) | CG_L(0); + a_t = 0xffff << CG_AT__CG_R__SHIFT | 0 << CG_AT__CG_L__SHIFT; smc_state->levels[0].aT = cpu_to_be32(a_t); return 0; } @@ -5600,13 +5622,13 @@ static int si_populate_smc_t(struct amdgpu_device *adev, t_l = (i + 1) * 1000 + 50 * R600_AH_DFLT; } - a_t = be32_to_cpu(smc_state->levels[i].aT) & ~CG_R_MASK; - a_t |= CG_R(t_l * pi->bsp / 20000); + a_t = be32_to_cpu(smc_state->levels[i].aT) & ~CG_AT__CG_R_MASK; + a_t |= (t_l * pi->bsp / 20000) << CG_AT__CG_R__SHIFT; smc_state->levels[i].aT = cpu_to_be32(a_t); high_bsp = (i == state->performance_level_count - 2) ? pi->pbsp : pi->bsp; - a_t = CG_R(0xffff) | CG_L(t_h * high_bsp / 20000); + a_t = (0xffff) << CG_AT__CG_R__SHIFT | (t_h * high_bsp / 20000) << CG_AT__CG_L__SHIFT; smc_state->levels[i + 1].aT = cpu_to_be32(a_t); } @@ -6180,9 +6202,9 @@ static int si_upload_mc_reg_table(struct amdgpu_device *adev, static void si_enable_voltage_control(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32_P(GENERAL_PWRMGT, VOLT_PWRMGT_EN, ~VOLT_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK, ~GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK); else - WREG32_P(GENERAL_PWRMGT, 0, ~VOLT_PWRMGT_EN); + WREG32_P(mmGENERAL_PWRMGT, 0, ~GENERAL_PWRMGT__VOLT_PWRMGT_EN_MASK); } static enum si_pcie_gen si_get_maximum_link_speed(struct amdgpu_device *adev, @@ -6204,8 +6226,8 @@ static u16 si_get_current_pcie_speed(struct amdgpu_device *adev) { u32 speed_cntl; - speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL) & LC_CURRENT_DATA_RATE_MASK; - speed_cntl >>= LC_CURRENT_DATA_RATE_SHIFT; + speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL) & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK; + speed_cntl >>= PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; return (u16)speed_cntl; } @@ -6412,21 +6434,21 @@ static void si_dpm_setup_asic(struct amdgpu_device *adev) static int si_thermal_enable_alert(struct amdgpu_device *adev, bool enable) { - u32 thermal_int = RREG32(CG_THERMAL_INT); + u32 thermal_int = RREG32(mmCG_THERMAL_INT); if (enable) { PPSMC_Result result; - thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); - WREG32(CG_THERMAL_INT, thermal_int); + thermal_int &= ~(CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK | CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK); + WREG32(mmCG_THERMAL_INT, thermal_int); result = amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_EnableThermalInterrupt); if (result != PPSMC_Result_OK) { DRM_DEBUG_KMS("Could not enable thermal interrupts.\n"); return -EINVAL; } } else { - thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; - WREG32(CG_THERMAL_INT, thermal_int); + thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK | CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK; + WREG32(mmCG_THERMAL_INT, thermal_int); } return 0; @@ -6447,9 +6469,9 @@ static int si_thermal_set_temperature_range(struct amdgpu_device *adev, return -EINVAL; } - WREG32_P(CG_THERMAL_INT, DIG_THERM_INTH(high_temp / 1000), ~DIG_THERM_INTH_MASK); - WREG32_P(CG_THERMAL_INT, DIG_THERM_INTL(low_temp / 1000), ~DIG_THERM_INTL_MASK); - WREG32_P(CG_THERMAL_CTRL, DIG_THERM_DPM(high_temp / 1000), ~DIG_THERM_DPM_MASK); + WREG32_P(mmCG_THERMAL_INT, (high_temp / 1000) << CG_THERMAL_INT__DIG_THERM_INTH__SHIFT, ~CG_THERMAL_INT__DIG_THERM_INTH_MASK); + WREG32_P(mmCG_THERMAL_INT, (low_temp / 1000) << CG_THERMAL_INT__DIG_THERM_INTL__SHIFT, ~CG_THERMAL_INT__DIG_THERM_INTL_MASK); + WREG32_P(mmCG_THERMAL_CTRL, (high_temp / 1000) << CG_THERMAL_CTRL__DIG_THERM_DPM__SHIFT, ~CG_THERMAL_CTRL__DIG_THERM_DPM_MASK); adev->pm.dpm.thermal.min_temp = low_temp; adev->pm.dpm.thermal.max_temp = high_temp; @@ -6463,20 +6485,20 @@ static void si_fan_ctrl_set_static_mode(struct amdgpu_device *adev, u32 mode) u32 tmp; if (si_pi->fan_ctrl_is_in_default_mode) { - tmp = (RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK) >> FDO_PWM_MODE_SHIFT; + tmp = (RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK) >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; si_pi->fan_ctrl_default_mode = tmp; - tmp = (RREG32(CG_FDO_CTRL2) & TMIN_MASK) >> TMIN_SHIFT; + tmp = (RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__TMIN_MASK) >> CG_FDO_CTRL2__TMIN__SHIFT; si_pi->t_min = tmp; si_pi->fan_ctrl_is_in_default_mode = false; } - tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK; - tmp |= TMIN(0); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK; + tmp |= 0 << CG_FDO_CTRL2__TMIN__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); - tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK; - tmp |= FDO_PWM_MODE(mode); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK; + tmp |= mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); } static int si_thermal_setup_fan_table(struct amdgpu_device *adev) @@ -6495,7 +6517,7 @@ static int si_thermal_setup_fan_table(struct amdgpu_device *adev) return 0; } - duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT; + duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; if (duty100 == 0) { adev->pm.dpm.fan.ucode_fan_control = false; @@ -6531,7 +6553,7 @@ static int si_thermal_setup_fan_table(struct amdgpu_device *adev) reference_clock) / 1600); fan_table.fdo_max = cpu_to_be16((u16)duty100); - tmp = (RREG32(CG_MULT_THERMAL_CTRL) & TEMP_SEL_MASK) >> TEMP_SEL_SHIFT; + tmp = (RREG32(mmCG_MULT_THERMAL_CTRL) & CG_MULT_THERMAL_CTRL__TEMP_SEL_MASK) >> CG_MULT_THERMAL_CTRL__TEMP_SEL__SHIFT; fan_table.temp_src = (uint8_t)tmp; ret = amdgpu_si_copy_bytes_to_smc(adev, @@ -6590,8 +6612,8 @@ static int si_dpm_get_fan_speed_pwm(void *handle, if (adev->pm.no_fan) return -ENOENT; - duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT; - duty = (RREG32(CG_THERMAL_STATUS) & FDO_PWM_DUTY_MASK) >> FDO_PWM_DUTY_SHIFT; + duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; + duty = (RREG32(mmCG_THERMAL_STATUS) & CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK) >> CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT; if (duty100 == 0) return -EINVAL; @@ -6621,7 +6643,7 @@ static int si_dpm_set_fan_speed_pwm(void *handle, if (speed > 255) return -EINVAL; - duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT; + duty100 = (RREG32(mmCG_FDO_CTRL1) & CG_FDO_CTRL1__FMAX_DUTY100_MASK) >> CG_FDO_CTRL1__FMAX_DUTY100__SHIFT; if (duty100 == 0) return -EINVAL; @@ -6630,9 +6652,9 @@ static int si_dpm_set_fan_speed_pwm(void *handle, do_div(tmp64, 255); duty = (u32)tmp64; - tmp = RREG32(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK; - tmp |= FDO_STATIC_DUTY(duty); - WREG32(CG_FDO_CTRL0, tmp); + tmp = RREG32(mmCG_FDO_CTRL0) & ~CG_FDO_CTRL0__FDO_STATIC_DUTY_MASK; + tmp |= duty << CG_FDO_CTRL0__FDO_STATIC_DUTY__SHIFT; + WREG32(mmCG_FDO_CTRL0, tmp); return 0; } @@ -6672,8 +6694,8 @@ static int si_dpm_get_fan_control_mode(void *handle, u32 *fan_mode) if (si_pi->fan_is_controlled_by_smc) return 0; - tmp = RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK; - *fan_mode = (tmp >> FDO_PWM_MODE_SHIFT); + tmp = RREG32(mmCG_FDO_CTRL2) & CG_FDO_CTRL2__FDO_PWM_MODE_MASK; + *fan_mode = (tmp >> CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT); return 0; } @@ -6691,7 +6713,7 @@ static int si_fan_ctrl_get_fan_speed_rpm(struct amdgpu_device *adev, if (adev->pm.fan_pulses_per_revolution == 0) return -ENOENT; - tach_period = (RREG32(CG_TACH_STATUS) & TACH_PERIOD_MASK) >> TACH_PERIOD_SHIFT; + tach_period = (RREG32(mmCG_TACH_STATUS) & CG_TACH_STATUS__TACH_PERIOD_MASK) >> CG_TACH_STATUS__TACH_PERIOD__SHIFT; if (tach_period == 0) return -ENOENT; @@ -6720,9 +6742,9 @@ static int si_fan_ctrl_set_fan_speed_rpm(struct amdgpu_device *adev, si_fan_ctrl_stop_smc_fan_control(adev); tach_period = 60 * xclk * 10000 / (8 * speed); - tmp = RREG32(CG_TACH_CTRL) & ~TARGET_PERIOD_MASK; - tmp |= TARGET_PERIOD(tach_period); - WREG32(CG_TACH_CTRL, tmp); + tmp = RREG32(mmCG_TACH_CTRL) & ~CG_TACH_CTRL__TARGET_PERIOD_MASK; + tmp |= tach_period << CG_TACH_CTRL__TARGET_PERIOD__SHIFT; + WREG32(mmCG_TACH_CTRL, tmp); si_fan_ctrl_set_static_mode(adev, FDO_PWM_MODE_STATIC_RPM); @@ -6736,13 +6758,13 @@ static void si_fan_ctrl_set_default_mode(struct amdgpu_device *adev) u32 tmp; if (!si_pi->fan_ctrl_is_in_default_mode) { - tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK; - tmp |= FDO_PWM_MODE(si_pi->fan_ctrl_default_mode); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__FDO_PWM_MODE_MASK; + tmp |= si_pi->fan_ctrl_default_mode << CG_FDO_CTRL2__FDO_PWM_MODE__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); - tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK; - tmp |= TMIN(si_pi->t_min); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TMIN_MASK; + tmp |= si_pi->t_min << CG_FDO_CTRL2__TMIN__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); si_pi->fan_ctrl_is_in_default_mode = true; } } @@ -6760,14 +6782,14 @@ static void si_thermal_initialize(struct amdgpu_device *adev) u32 tmp; if (adev->pm.fan_pulses_per_revolution) { - tmp = RREG32(CG_TACH_CTRL) & ~EDGE_PER_REV_MASK; - tmp |= EDGE_PER_REV(adev->pm.fan_pulses_per_revolution -1); - WREG32(CG_TACH_CTRL, tmp); + tmp = RREG32(mmCG_TACH_CTRL) & ~CG_TACH_CTRL__EDGE_PER_REV_MASK; + tmp |= (adev->pm.fan_pulses_per_revolution -1) << CG_TACH_CTRL__EDGE_PER_REV__SHIFT; + WREG32(mmCG_TACH_CTRL, tmp); } - tmp = RREG32(CG_FDO_CTRL2) & ~TACH_PWM_RESP_RATE_MASK; - tmp |= TACH_PWM_RESP_RATE(0x28); - WREG32(CG_FDO_CTRL2, tmp); + tmp = RREG32(mmCG_FDO_CTRL2) & ~CG_FDO_CTRL2__TACH_PWM_RESP_RATE_MASK; + tmp |= 0x28 << CG_FDO_CTRL2__TACH_PWM_RESP_RATE__SHIFT; + WREG32(mmCG_FDO_CTRL2, tmp); } static int si_thermal_start_thermal_controller(struct amdgpu_device *adev) @@ -7530,8 +7552,8 @@ static void si_dpm_debugfs_print_current_performance_level(void *handle, struct si_ps *ps = si_get_ps(rps); struct rv7xx_pl *pl; u32 current_index = - (RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_STATE_INDEX_MASK) >> - CURRENT_STATE_INDEX_SHIFT; + (RREG32(mmTARGET_AND_CURRENT_PROFILE_INDEX) & TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK) >> + TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT; if (current_index >= ps->performance_level_count) { seq_printf(m, "invalid dpm profile %d\n", current_index); @@ -7554,14 +7576,14 @@ static int si_dpm_set_interrupt_state(struct amdgpu_device *adev, case AMDGPU_THERMAL_IRQ_LOW_TO_HIGH: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int |= THERM_INT_MASK_HIGH; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; case AMDGPU_IRQ_STATE_ENABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int &= ~THERM_INT_MASK_HIGH; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int &= ~CG_THERMAL_INT__THERM_INT_MASK_HIGH_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; default: break; @@ -7571,14 +7593,14 @@ static int si_dpm_set_interrupt_state(struct amdgpu_device *adev, case AMDGPU_THERMAL_IRQ_HIGH_TO_LOW: switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int |= THERM_INT_MASK_LOW; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int |= CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; case AMDGPU_IRQ_STATE_ENABLE: - cg_thermal_int = RREG32_SMC(CG_THERMAL_INT); - cg_thermal_int &= ~THERM_INT_MASK_LOW; - WREG32_SMC(CG_THERMAL_INT, cg_thermal_int); + cg_thermal_int = RREG32_SMC(mmCG_THERMAL_INT); + cg_thermal_int &= ~CG_THERMAL_INT__THERM_INT_MASK_LOW_MASK; + WREG32_SMC(mmCG_THERMAL_INT, cg_thermal_int); break; default: break; @@ -7883,8 +7905,8 @@ static int si_dpm_get_temp(void *handle) int actual_temp = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >> - CTF_TEMP_SHIFT; + temp = (RREG32(mmCG_MULT_THERMAL_STATUS) & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> + CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; if (temp & 0x200) actual_temp = 255; @@ -8014,8 +8036,8 @@ static int si_dpm_read_sensor(void *handle, int idx, struct si_ps *ps = si_get_ps(rps); uint32_t sclk, mclk; u32 pl_index = - (RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_STATE_INDEX_MASK) >> - CURRENT_STATE_INDEX_SHIFT; + (RREG32(mmTARGET_AND_CURRENT_PROFILE_INDEX) & TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX_MASK) >> + TARGET_AND_CURRENT_PROFILE_INDEX__CURRENT_STATE_INDEX__SHIFT; /* size must be at least 4 bytes for all sensors */ if (*size < 4) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c index 8f994ffa9cd1..4e65ab9e931c 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c @@ -30,6 +30,12 @@ #include "amdgpu_ucode.h" #include "sislands_smc.h" +#include "smu/smu_6_0_d.h" +#include "smu/smu_6_0_sh_mask.h" + +#include "gca/gfx_6_0_d.h" +#include "gca/gfx_6_0_sh_mask.h" + static int si_set_smc_sram_address(struct amdgpu_device *adev, u32 smc_address, u32 limit) { @@ -38,8 +44,8 @@ static int si_set_smc_sram_address(struct amdgpu_device *adev, if ((smc_address + 3) > limit) return -EINVAL; - WREG32(SMC_IND_INDEX_0, smc_address); - WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + WREG32(mmSMC_IND_INDEX_0, smc_address); + WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); return 0; } @@ -68,7 +74,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev, if (ret) goto done; - WREG32(SMC_IND_DATA_0, data); + WREG32(mmSMC_IND_DATA_0, data); src += 4; byte_count -= 4; @@ -83,7 +89,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev, if (ret) goto done; - original_data = RREG32(SMC_IND_DATA_0); + original_data = RREG32(mmSMC_IND_DATA_0); extra_shift = 8 * (4 - byte_count); while (byte_count > 0) { @@ -99,7 +105,7 @@ int amdgpu_si_copy_bytes_to_smc(struct amdgpu_device *adev, if (ret) goto done; - WREG32(SMC_IND_DATA_0, data); + WREG32(mmSMC_IND_DATA_0, data); } done: @@ -121,10 +127,10 @@ void amdgpu_si_reset_smc(struct amdgpu_device *adev) { u32 tmp; - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); - RREG32(CB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); + RREG32(mmCB_CGTT_SCLK_CTRL); tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL) | RST_REG; @@ -170,16 +176,16 @@ PPSMC_Result amdgpu_si_send_msg_to_smc(struct amdgpu_device *adev, if (!amdgpu_si_is_smc_running(adev)) return PPSMC_Result_Failed; - WREG32(SMC_MESSAGE_0, msg); + WREG32(mmSMC_MESSAGE_0, msg); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SMC_RESP_0); + tmp = RREG32(mmSMC_RESP_0); if (tmp != 0) break; udelay(1); } - return (PPSMC_Result)RREG32(SMC_RESP_0); + return (PPSMC_Result)RREG32(mmSMC_RESP_0); } PPSMC_Result amdgpu_si_wait_for_smc_inactive(struct amdgpu_device *adev) @@ -225,18 +231,18 @@ int amdgpu_si_load_smc_ucode(struct amdgpu_device *adev, u32 limit) return -EINVAL; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(SMC_IND_INDEX_0, ucode_start_address); - WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0); + WREG32(mmSMC_IND_INDEX_0, ucode_start_address); + WREG32_P(mmSMC_IND_ACCESS_CNTL, SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); while (ucode_size >= 4) { /* SMC address space is BE */ data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3]; - WREG32(SMC_IND_DATA_0, data); + WREG32(mmSMC_IND_DATA_0, data); src += 4; ucode_size -= 4; } - WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0); + WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return 0; @@ -251,7 +257,7 @@ int amdgpu_si_read_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address, spin_lock_irqsave(&adev->smc_idx_lock, flags); ret = si_set_smc_sram_address(adev, smc_address, limit); if (ret == 0) - *value = RREG32(SMC_IND_DATA_0); + *value = RREG32(mmSMC_IND_DATA_0); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return ret; @@ -266,7 +272,7 @@ int amdgpu_si_write_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address, spin_lock_irqsave(&adev->smc_idx_lock, flags); ret = si_set_smc_sram_address(adev, smc_address, limit); if (ret == 0) - WREG32(SMC_IND_DATA_0, value); + WREG32(mmSMC_IND_DATA_0, value); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); return ret; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index 4bd92fd782be..8d40ed0f0e83 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -143,6 +143,10 @@ int atomctrl_initialize_mc_reg_table( vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); @@ -180,6 +184,10 @@ int atomctrl_initialize_mc_reg_table_v2_2( vram_info = (ATOM_VRAM_INFO_HEADER_V2_2 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c index 5a010cd38303..baf51cd82a35 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c @@ -46,42 +46,6 @@ static int smu7_set_smc_sram_address(struct pp_hwmgr *hwmgr, uint32_t smc_addr, } -int smu7_copy_bytes_from_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address, uint32_t *dest, uint32_t byte_count, uint32_t limit) -{ - uint32_t data; - uint32_t addr; - uint8_t *dest_byte; - uint8_t i, data_byte[4] = {0}; - uint32_t *pdata = (uint32_t *)&data_byte; - - PP_ASSERT_WITH_CODE((0 == (3 & smc_start_address)), "SMC address must be 4 byte aligned.", return -EINVAL); - PP_ASSERT_WITH_CODE((limit > (smc_start_address + byte_count)), "SMC address is beyond the SMC RAM area.", return -EINVAL); - - addr = smc_start_address; - - while (byte_count >= 4) { - smu7_read_smc_sram_dword(hwmgr, addr, &data, limit); - - *dest = PP_SMC_TO_HOST_UL(data); - - dest += 1; - byte_count -= 4; - addr += 4; - } - - if (byte_count) { - smu7_read_smc_sram_dword(hwmgr, addr, &data, limit); - *pdata = PP_SMC_TO_HOST_UL(data); - /* Cast dest into byte type in dest_byte. This way, we don't overflow if the allocated memory is not 4-byte aligned. */ - dest_byte = (uint8_t *)dest; - for (i = 0; i < byte_count; i++) - dest_byte[i] = data_byte[i]; - } - - return 0; -} - - int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address, const uint8_t *src, uint32_t byte_count, uint32_t limit) { diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h index e7303dc8c260..63e428ceaee4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.h @@ -53,8 +53,6 @@ struct smu7_smumgr { }; -int smu7_copy_bytes_from_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address, - uint32_t *dest, uint32_t byte_count, uint32_t limit); int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address, const uint8_t *src, uint32_t byte_count, uint32_t limit); int smu7_program_jump_on_start(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 46cce1d2aaf3..f24a1d8c77db 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -3432,15 +3432,15 @@ bool smu_mode1_reset_is_support(struct smu_context *smu) return ret; } -bool smu_mode2_reset_is_support(struct smu_context *smu) +bool smu_link_reset_is_support(struct smu_context *smu) { bool ret = false; if (!smu->pm_enabled) return false; - if (smu->ppt_funcs && smu->ppt_funcs->mode2_reset_is_support) - ret = smu->ppt_funcs->mode2_reset_is_support(smu); + if (smu->ppt_funcs && smu->ppt_funcs->link_reset_is_support) + ret = smu->ppt_funcs->link_reset_is_support(smu); return ret; } @@ -3475,6 +3475,19 @@ static int smu_mode2_reset(void *handle) return ret; } +int smu_link_reset(struct smu_context *smu) +{ + int ret = 0; + + if (!smu->pm_enabled) + return -EOPNOTSUPP; + + if (smu->ppt_funcs->link_reset) + ret = smu->ppt_funcs->link_reset(smu); + + return ret; +} + static int smu_enable_gfx_features(void *handle) { struct smu_context *smu = handle; @@ -3975,3 +3988,11 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } + +int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask) +{ + if (smu->ppt_funcs && smu->ppt_funcs->dpm_reset_vcn) + smu->ppt_funcs->dpm_reset_vcn(smu, inst_mask); + + return 0; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index dd6d0e7aa242..d47e32ae4671 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -438,9 +438,11 @@ struct mclock_latency_table { }; enum smu_reset_mode { - SMU_RESET_MODE_0, - SMU_RESET_MODE_1, - SMU_RESET_MODE_2, + SMU_RESET_MODE_0, + SMU_RESET_MODE_1, + SMU_RESET_MODE_2, + SMU_RESET_MODE_3, + SMU_RESET_MODE_4, }; enum smu_baco_state { @@ -1229,10 +1231,11 @@ struct pptable_funcs { * @mode1_reset_is_support: Check if GPU supports mode1 reset. */ bool (*mode1_reset_is_support)(struct smu_context *smu); + /** - * @mode2_reset_is_support: Check if GPU supports mode2 reset. + * @link_reset_is_support: Check if GPU supports link reset. */ - bool (*mode2_reset_is_support)(struct smu_context *smu); + bool (*link_reset_is_support)(struct smu_context *smu); /** * @mode1_reset: Perform mode1 reset. @@ -1251,6 +1254,13 @@ struct pptable_funcs { /* for gfx feature enablement after mode2 reset */ int (*enable_gfx_features)(struct smu_context *smu); + /** + * @link_reset: Perform link reset. + * + * The gfx device driver reset + */ + int (*link_reset)(struct smu_context *smu); + /** * @get_dpm_ultimate_freq: Get the hard frequency range of a clock * domain in MHz. @@ -1382,6 +1392,11 @@ struct pptable_funcs { */ bool (*reset_sdma_is_supported)(struct smu_context *smu); + /** + * @reset_vcn: message SMU to soft reset vcn instance. + */ + int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask); + /** * @get_ecc_table: message SMU to get ECC INFO table. */ @@ -1601,8 +1616,9 @@ int smu_get_power_limit(void *handle, enum pp_power_type pp_power_type); bool smu_mode1_reset_is_support(struct smu_context *smu); -bool smu_mode2_reset_is_support(struct smu_context *smu); +bool smu_link_reset_is_support(struct smu_context *smu); int smu_mode1_reset(struct smu_context *smu); +int smu_link_reset(struct smu_context *smu); extern const struct amd_ip_funcs smu_ip_funcs; @@ -1643,6 +1659,7 @@ int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size); int smu_send_rma_reason(struct smu_context *smu); int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); bool smu_reset_sdma_is_supported(struct smu_context *smu); +int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask); int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, int level); ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index d26f35119a12..3d9e5e967c94 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -459,4 +459,11 @@ typedef struct __attribute__((packed, aligned(4))) { uint64_t AccGfxclkBelowHostLimit; } VfMetricsTable_t; +#pragma pack(push, 4) +typedef struct { + // Telemetry + uint32_t InputTelemetryVoltageInmV; +} StaticMetricsTable_t; +#pragma pack(pop) + #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 288b2576432b..41f268313613 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -94,7 +94,9 @@ #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SetThrottlingPolicy 0x44 #define PPSMC_MSG_ResetSDMA 0x4D -#define PPSMC_Message_Count 0x4E +#define PPSMC_MSG_ResetVCN 0x4E +#define PPSMC_MSG_GetStaticMetricsTable 0x59 +#define PPSMC_Message_Count 0x5A //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index c9dee09395e3..eefdaa0b5df6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -277,6 +277,7 @@ __SMU_DUMMY_MAP(MALLPowerController), \ __SMU_DUMMY_MAP(MALLPowerState), \ __SMU_DUMMY_MAP(ResetSDMA), \ + __SMU_DUMMY_MAP(ResetVCN), \ __SMU_DUMMY_MAP(GetStaticMetricsTable), #undef __SMU_DUMMY_MAP diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h index ed8304d82831..56ae555bb52a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h @@ -281,11 +281,6 @@ int smu_v11_0_set_single_dpm_table(struct smu_context *smu, enum smu_clk_type clk_type, struct smu_11_0_dpm_table *single_dpm_table); -int smu_v11_0_get_dpm_level_range(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t *min_value, - uint32_t *max_value); - int smu_v11_0_get_current_pcie_link_width_level(struct smu_context *smu); uint16_t smu_v11_0_get_current_pcie_link_width(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index cd03caffe317..4263798d716b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -112,6 +112,7 @@ struct smu_13_0_dpm_context { uint32_t workload_policy_mask; uint32_t dcef_min_ds_clk; uint64_t caps; + uint32_t board_volt; }; enum smu_13_0_power_state { @@ -162,8 +163,6 @@ int smu_v13_0_notify_memory_pool_location(struct smu_context *smu); int smu_v13_0_system_features_control(struct smu_context *smu, bool en); -int smu_v13_0_init_display_count(struct smu_context *smu, uint32_t count); - int smu_v13_0_set_allowed_mask(struct smu_context *smu); int smu_v13_0_notify_display_change(struct smu_context *smu); @@ -183,13 +182,6 @@ int smu_v13_0_disable_thermal_alert(struct smu_context *smu); int smu_v13_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value); -int smu_v13_0_set_min_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk); - -int -smu_v13_0_display_clock_voltage_request(struct smu_context *smu, - struct pp_display_clock_request - *clock_req); - uint32_t smu_v13_0_get_fan_control_mode(struct smu_context *smu); @@ -226,11 +218,6 @@ int smu_v13_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type c int smu_v13_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max, bool automatic); -int smu_v13_0_set_hard_freq_limited_range(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t min, - uint32_t max); - int smu_v13_0_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level); @@ -310,14 +297,6 @@ int smu_v13_0_get_boot_freq_by_index(struct smu_context *smu, uint32_t *value); void smu_v13_0_interrupt_work(struct smu_context *smu); -bool smu_v13_0_12_is_dpm_running(struct smu_context *smu); -int smu_v13_0_12_get_max_metrics_size(void); -int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu); -int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, - MetricsMember_t member, - uint32_t *value); -ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table); -extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[]; -extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[]; +void smu_v13_0_reset_custom_level(struct smu_context *smu); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 453952cdc353..9ad46f545d15 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1347,7 +1347,7 @@ static int arcturus_get_power_limit(struct smu_context *smu, *default_power_limit = power_limit; if (max_power_limit) *max_power_limit = power_limit; - /** + /* * No lower bound is imposed on the limit. Any unreasonable limit set * will result in frequent throttling. */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 19a25fdc2f5b..115e3fa456bc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -3089,11 +3089,6 @@ static int sienna_cichlid_stb_get_data_direct(struct smu_context *smu, return 0; } -static bool sienna_cichlid_is_mode2_reset_supported(struct smu_context *smu) -{ - return true; -} - static int sienna_cichlid_mode2_reset(struct smu_context *smu) { int ret = 0, index; @@ -3229,7 +3224,6 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_default_config_table_settings = sienna_cichlid_get_default_config_table_settings, .set_config_table = sienna_cichlid_set_config_table, .get_unique_id = sienna_cichlid_get_unique_id, - .mode2_reset_is_support = sienna_cichlid_is_mode2_reset_supported, .mode2_reset = sienna_cichlid_mode2_reset, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 25fabf336a64..78e4186d06cc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -2059,45 +2059,6 @@ int smu_v11_0_set_single_dpm_table(struct smu_context *smu, return 0; } -int smu_v11_0_get_dpm_level_range(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t *min_value, - uint32_t *max_value) -{ - uint32_t level_count = 0; - int ret = 0; - - if (!min_value && !max_value) - return -EINVAL; - - if (min_value) { - /* by default, level 0 clock value as min value */ - ret = smu_v11_0_get_dpm_freq_by_index(smu, - clk_type, - 0, - min_value); - if (ret) - return ret; - } - - if (max_value) { - ret = smu_v11_0_get_dpm_level_count(smu, - clk_type, - &level_count); - if (ret) - return ret; - - ret = smu_v11_0_get_dpm_freq_by_index(smu, - clk_type, - level_count - 1, - max_value); - if (ret) - return ret; - } - - return ret; -} - int smu_v11_0_get_current_pcie_link_width_level(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 83163d7c7f00..6de653d2ed62 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1270,6 +1270,7 @@ static int aldebaran_set_performance_level(struct smu_context *smu, struct smu_13_0_dpm_table *gfx_table = &dpm_context->dpm_tables.gfx_table; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + int r; /* Disable determinism if switching to another mode */ if ((smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) && @@ -1282,7 +1283,11 @@ static int aldebaran_set_performance_level(struct smu_context *smu, case AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM: return 0; - + case AMD_DPM_FORCED_LEVEL_AUTO: + r = smu_v13_0_set_performance_level(smu, level); + if (!r) + smu_v13_0_reset_custom_level(smu); + return r; case AMD_DPM_FORCED_LEVEL_HIGH: case AMD_DPM_FORCED_LEVEL_LOW: case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: @@ -1423,7 +1428,11 @@ static int aldebaran_usr_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_ min_clk = dpm_context->dpm_tables.gfx_table.min; max_clk = dpm_context->dpm_tables.gfx_table.max; - return aldebaran_set_soft_freq_limited_range(smu, SMU_GFXCLK, min_clk, max_clk, false); + ret = aldebaran_set_soft_freq_limited_range( + smu, SMU_GFXCLK, min_clk, max_clk, false); + if (ret) + return ret; + smu_v13_0_reset_custom_level(smu); } break; case PP_OD_COMMIT_DPM_TABLE: @@ -1976,11 +1985,6 @@ static bool aldebaran_is_mode1_reset_supported(struct smu_context *smu) return true; } -static bool aldebaran_is_mode2_reset_supported(struct smu_context *smu) -{ - return true; -} - static int aldebaran_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state) { @@ -2086,7 +2090,6 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, .get_gpu_metrics = aldebaran_get_gpu_metrics, .mode1_reset_is_support = aldebaran_is_mode1_reset_supported, - .mode2_reset_is_support = aldebaran_is_mode2_reset_supported, .smu_handle_passthrough_sbr = aldebaran_smu_handle_passthrough_sbr, .mode1_reset = aldebaran_mode1_reset, .set_mp1_state = aldebaran_set_mp1_state, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index ba5a9012dbd5..a7167668d189 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -709,18 +709,6 @@ int smu_v13_0_notify_memory_pool_location(struct smu_context *smu) return ret; } -int smu_v13_0_set_min_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk) -{ - int ret; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetMinDeepSleepDcefclk, clk, NULL); - if (ret) - dev_err(smu->adev->dev, "SMU13 attempt to set divider for DCEFCLK Failed!"); - - return ret; -} - int smu_v13_0_set_driver_table_location(struct smu_context *smu) { struct smu_table *driver_table = &smu->smu_table.driver_table; @@ -761,18 +749,6 @@ int smu_v13_0_set_tool_table_location(struct smu_context *smu) return ret; } -int smu_v13_0_init_display_count(struct smu_context *smu, uint32_t count) -{ - int ret = 0; - - if (!smu->pm_enabled) - return ret; - - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, count, NULL); - - return ret; -} - int smu_v13_0_set_allowed_mask(struct smu_context *smu) { struct smu_feature *feature = &smu->smu_feature; @@ -1073,56 +1049,6 @@ int smu_v13_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value) } -int -smu_v13_0_display_clock_voltage_request(struct smu_context *smu, - struct pp_display_clock_request - *clock_req) -{ - enum amd_pp_clock_type clk_type = clock_req->clock_type; - int ret = 0; - enum smu_clk_type clk_select = 0; - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; - - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) || - smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { - switch (clk_type) { - case amd_pp_dcef_clock: - clk_select = SMU_DCEFCLK; - break; - case amd_pp_disp_clock: - clk_select = SMU_DISPCLK; - break; - case amd_pp_pixel_clock: - clk_select = SMU_PIXCLK; - break; - case amd_pp_phy_clock: - clk_select = SMU_PHYCLK; - break; - case amd_pp_mem_clock: - clk_select = SMU_UCLK; - break; - default: - dev_info(smu->adev->dev, "[%s] Invalid Clock Type!", __func__); - ret = -EINVAL; - break; - } - - if (ret) - goto failed; - - if (clk_select == SMU_UCLK && smu->disable_uclk_switch) - return 0; - - ret = smu_v13_0_set_hard_freq_limited_range(smu, clk_select, clk_freq, 0); - - if (clk_select == SMU_UCLK) - smu->hard_min_uclk_req_from_dal = clk_freq; - } - -failed: - return ret; -} - uint32_t smu_v13_0_get_fan_control_mode(struct smu_context *smu) { if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_FAN_CONTROL_BIT)) @@ -1647,45 +1573,6 @@ out: return ret; } -int smu_v13_0_set_hard_freq_limited_range(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t min, - uint32_t max) -{ - int ret = 0, clk_id = 0; - uint32_t param; - - if (min <= 0 && max <= 0) - return -EINVAL; - - if (!smu_cmn_clk_dpm_is_enabled(smu, clk_type)) - return 0; - - clk_id = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_CLK, - clk_type); - if (clk_id < 0) - return clk_id; - - if (max > 0) { - param = (uint32_t)((clk_id << 16) | (max & 0xffff)); - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMaxByFreq, - param, NULL); - if (ret) - return ret; - } - - if (min > 0) { - param = (uint32_t)((clk_id << 16) | (min & 0xffff)); - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinByFreq, - param, NULL); - if (ret) - return ret; - } - - return ret; -} - int smu_v13_0_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level) { @@ -2595,3 +2482,13 @@ int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu, return ret; } + +void smu_v13_0_reset_custom_level(struct smu_context *smu) +{ + struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; + + pstate_table->uclk_pstate.custom.min = 0; + pstate_table->uclk_pstate.custom.max = 0; + pstate_table->gfxclk_pstate.custom.min = 0; + pstate_table->gfxclk_pstate.custom.max = 0; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 238bd71baa6d..533d58e57d05 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -187,26 +187,6 @@ int smu_v13_0_12_get_max_metrics_size(void) return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t)); } -static int smu_v13_0_12_get_static_metrics_table(struct smu_context *smu) -{ - struct smu_table_context *smu_table = &smu->smu_table; - uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; - struct smu_table *table = &smu_table->driver_table; - int ret; - - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetStaticMetricsTable, NULL); - if (ret) { - dev_info(smu->adev->dev, - "Failed to export static metrics table!\n"); - return ret; - } - - amdgpu_asic_invalidate_hdp(smu->adev, NULL); - memcpy(smu_table->metrics_table, table->cpu_addr, table_size); - - return 0; -} - int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; @@ -217,7 +197,7 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) int ret, i; if (!pptable->Init) { - ret = smu_v13_0_12_get_static_metrics_table(smu); + ret = smu_v13_0_6_get_static_metrics_table(smu); if (ret) return ret; @@ -345,8 +325,8 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_7 *gpu_metrics = - (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_8 *gpu_metrics = + (struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table; int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; u8 num_jpeg_rings_gpu_metrics; @@ -357,7 +337,7 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table) metrics = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); memcpy(metrics, smu_table->metrics_table, sizeof(MetricsTable_t)); - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 8); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(metrics->MaxSocketTemperature); @@ -474,6 +454,16 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table) SMUQ10_ROUND(metrics->GfxBusy[inst]); gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = SMUQ10_ROUND(metrics->GfxBusyAcc[inst]); + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(HST_LIMIT_METRICS))) { + gpu_metrics->xcp_stats[i].gfx_below_host_limit_ppt_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkBelowHostLimitPptAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_thm_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkBelowHostLimitThmAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_low_utilization_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkLowUtilizationAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_total_acc[idx] = + SMUQ10_ROUND(metrics->GfxclkBelowHostLimitTotalAcc[inst]); + } idx++; } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index c478b3be37af..7d4ff09be7e8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -101,24 +101,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } -#define SMU_CAP(x) SMU_13_0_6_CAPS_##x - -enum smu_v13_0_6_caps { - SMU_CAP(DPM), - SMU_CAP(DPM_POLICY), - SMU_CAP(OTHER_END_METRICS), - SMU_CAP(SET_UCLK_MAX), - SMU_CAP(PCIE_METRICS), - SMU_CAP(MCA_DEBUG_MODE), - SMU_CAP(PER_INST_METRICS), - SMU_CAP(CTF_LIMIT), - SMU_CAP(RMA_MSG), - SMU_CAP(ACA_SYND), - SMU_CAP(SDMA_RESET), - SMU_CAP(STATIC_METRICS), - SMU_CAP(ALL), -}; - struct mca_bank_ipid { enum amdgpu_mca_ip ip; uint16_t hwid; @@ -194,6 +176,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), + MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), + MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 0), }; // clang-format on @@ -299,8 +283,8 @@ static inline void smu_v13_0_6_cap_clear(struct smu_context *smu, dpm_context->caps &= ~BIT_ULL(cap); } -static inline bool smu_v13_0_6_cap_supported(struct smu_context *smu, - enum smu_v13_0_6_caps cap) +bool smu_v13_0_6_cap_supported(struct smu_context *smu, + enum smu_v13_0_6_caps cap) { struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; @@ -353,6 +337,9 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) if (fw_ver >= 0x00561E00) smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); + + if (fw_ver >= 0x00562500) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); } static void smu_v13_0_6_init_caps(struct smu_context *smu) @@ -402,6 +389,13 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); if (fw_ver < 0x00555600) smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); + if ((pgm == 7 && fw_ver >= 0x7550E00) || + (pgm == 0 && fw_ver >= 0x00557E00)) + smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + if (fw_ver >= 0x00557F01) { + smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); + smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); + } } if (((pgm == 7) && (fw_ver >= 0x7550700)) || ((pgm == 0) && (fw_ver >= 0x00557900)) || @@ -525,7 +519,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_7); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_8); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -747,9 +741,43 @@ static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, return pm_metrics->common_header.structure_size; } +static void smu_v13_0_6_fill_static_metrics_table(struct smu_context *smu, + StaticMetricsTable_t *static_metrics) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + + if (!static_metrics->InputTelemetryVoltageInmV) { + dev_warn(smu->adev->dev, "Invalid board voltage %d\n", + static_metrics->InputTelemetryVoltageInmV); + } + + dpm_context->board_volt = static_metrics->InputTelemetryVoltageInmV; +} + +int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; + struct smu_table *table = &smu_table->driver_table; + int ret; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetStaticMetricsTable, NULL); + if (ret) { + dev_info(smu->adev->dev, + "Failed to export static metrics table!\n"); + return ret; + } + + amdgpu_asic_invalidate_hdp(smu->adev, NULL); + memcpy(smu_table->metrics_table, table->cpu_addr, table_size); + + return 0; +} + static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; + StaticMetricsTable_t *static_metrics = (StaticMetricsTable_t *)smu_table->metrics_table; MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; @@ -759,7 +787,8 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) int ret, i, retry = 100; uint32_t table_version; - if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) return smu_v13_0_12_setup_driver_pptable(smu); /* Store one-time values in driver PPTable */ @@ -813,6 +842,12 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0]; pptable->Init = true; + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { + ret = smu_v13_0_6_get_static_metrics_table(smu); + if (ret) + return ret; + smu_v13_0_6_fill_static_metrics_table(smu, static_metrics); + } } return 0; @@ -1142,7 +1177,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, if (ret) return ret; - if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) return smu_v13_0_12_get_smu_metrics_data(smu, member, value); /* For clocks with multiple instances, only report the first one */ @@ -1616,6 +1652,7 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size) { + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; int ret = 0; if (amdgpu_ras_intr_triggered()) @@ -1660,6 +1697,15 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu, ret = smu_v13_0_get_gfx_vdd(smu, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_VDDBOARD: + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(BOARD_VOLTAGE))) { + *(uint32_t *)data = dpm_context->board_volt; + *size = 4; + break; + } else { + ret = -EOPNOTSUPP; + break; + } case AMDGPU_PP_SENSOR_GPU_AVG_POWER: default: ret = -EOPNOTSUPP; @@ -1927,7 +1973,7 @@ static int smu_v13_0_6_set_performance_level(struct smu_context *smu, return ret; pstate_table->uclk_pstate.curr.max = uclk_table->max; } - pstate_table->uclk_pstate.custom.max = 0; + smu_v13_0_reset_custom_level(smu); return 0; case AMD_DPM_FORCED_LEVEL_MANUAL: @@ -2140,7 +2186,7 @@ static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, smu, SMU_UCLK, min_clk, max_clk, false); if (ret) return ret; - pstate_table->uclk_pstate.custom.max = 0; + smu_v13_0_reset_custom_level(smu); } break; case PP_OD_COMMIT_DPM_TABLE: @@ -2486,8 +2532,8 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_7 *gpu_metrics = - (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_8 *gpu_metrics = + (struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table; int version = smu_v13_0_6_get_metrics_version(smu); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; @@ -2507,13 +2553,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table return ret; } - if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) return smu_v13_0_12_get_gpu_metrics(smu, table); metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 8); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)); @@ -2666,6 +2713,20 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = SMUQ10_ROUND(GET_GPU_METRIC_FIELD(GfxBusyAcc, version)[inst]); + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(HST_LIMIT_METRICS))) { + gpu_metrics->xcp_stats[i].gfx_below_host_limit_ppt_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkBelowHostLimitPptAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_thm_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkBelowHostLimitThmAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_low_utilization_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkLowUtilizationAcc[inst]); + gpu_metrics->xcp_stats[i].gfx_below_host_limit_total_acc[idx] = + SMUQ10_ROUND + (metrics_v0->GfxclkBelowHostLimitTotalAcc[inst]); + } idx++; } } @@ -2844,14 +2905,29 @@ static int smu_v13_0_6_mode1_reset(struct smu_context *smu) return ret; } +static int smu_v13_0_6_link_reset(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, + SMU_RESET_MODE_4, NULL); + return ret; +} + static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu) { return true; } -static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu) +static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu) { - return true; + struct amdgpu_device *adev = smu->adev; + int var = (adev->pdev->device & 0xF); + + if (var == 0x1) + return true; + + return false; } static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, @@ -2924,6 +3000,19 @@ static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) +{ + int ret = 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ResetVCN, inst_mask, NULL); + if (ret) + dev_err(smu->adev->dev, + "failed to send ResetVCN event with mask 0x%x\n", + inst_mask); + return ret; +} + + static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3586,9 +3675,10 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .get_pm_metrics = smu_v13_0_6_get_pm_metrics, .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, - .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported, + .link_reset_is_support = smu_v13_0_6_is_link_reset_supported, .mode1_reset = smu_v13_0_6_mode1_reset, .mode2_reset = smu_v13_0_6_mode2_reset, + .link_reset = smu_v13_0_6_link_reset, .wait_for_event = smu_v13_0_wait_for_event, .i2c_init = smu_v13_0_6_i2c_control_init, .i2c_fini = smu_v13_0_6_i2c_control_fini, @@ -3596,6 +3686,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .send_rma_reason = smu_v13_0_6_send_rma_reason, .reset_sdma = smu_v13_0_6_reset_sdma, .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported, + .dpm_reset_vcn = smu_v13_0_6_reset_vcn, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index 83745909e564..d151bcd0cca7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -26,6 +26,7 @@ #define SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL 0x2 #define SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL 0x4 #define SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL 0x2 +#define SMU_CAP(x) SMU_13_0_6_CAPS_##x typedef enum { /*0*/ METRICS_VERSION_V0 = 0, @@ -51,6 +52,34 @@ struct PPTable_t { bool Init; }; -extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); +enum smu_v13_0_6_caps { + SMU_CAP(DPM), + SMU_CAP(DPM_POLICY), + SMU_CAP(OTHER_END_METRICS), + SMU_CAP(SET_UCLK_MAX), + SMU_CAP(PCIE_METRICS), + SMU_CAP(MCA_DEBUG_MODE), + SMU_CAP(PER_INST_METRICS), + SMU_CAP(CTF_LIMIT), + SMU_CAP(RMA_MSG), + SMU_CAP(ACA_SYND), + SMU_CAP(SDMA_RESET), + SMU_CAP(STATIC_METRICS), + SMU_CAP(HST_LIMIT_METRICS), + SMU_CAP(BOARD_VOLTAGE), + SMU_CAP(ALL), +}; +extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); +bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap); +int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu); + +bool smu_v13_0_12_is_dpm_running(struct smu_context *smu); +int smu_v13_0_12_get_max_metrics_size(void); +int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu); +int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, + MetricsMember_t member, uint32_t *value); +ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table); +extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[]; +extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[]; #endif diff --git a/drivers/gpu/drm/ast/ast_cursor.c b/drivers/gpu/drm/ast/ast_cursor.c index 139ab00dee8f..2d3ad7610c2e 100644 --- a/drivers/gpu/drm/ast/ast_cursor.c +++ b/drivers/gpu/drm/ast/ast_cursor.c @@ -37,6 +37,7 @@ */ /* define for signature structure */ +#define AST_HWC_SIGNATURE_SIZE SZ_32 #define AST_HWC_SIGNATURE_CHECKSUM 0x00 #define AST_HWC_SIGNATURE_SizeX 0x04 #define AST_HWC_SIGNATURE_SizeY 0x08 @@ -45,6 +46,21 @@ #define AST_HWC_SIGNATURE_HOTSPOTX 0x14 #define AST_HWC_SIGNATURE_HOTSPOTY 0x18 +static unsigned long ast_cursor_vram_size(void) +{ + return AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE; +} + +long ast_cursor_vram_offset(struct ast_device *ast) +{ + unsigned long size = ast_cursor_vram_size(); + + if (size > ast->vram_size) + return -EINVAL; + + return ALIGN_DOWN(ast->vram_size - size, SZ_8); +} + static u32 ast_cursor_calculate_checksum(const void *src, unsigned int width, unsigned int height) { u32 csum = 0; @@ -75,7 +91,7 @@ static u32 ast_cursor_calculate_checksum(const void *src, unsigned int width, un static void ast_set_cursor_image(struct ast_device *ast, const u8 *src, unsigned int width, unsigned int height) { - u8 __iomem *dst = ast->cursor_plane.base.vaddr; + u8 __iomem *dst = ast_plane_vaddr(&ast->cursor_plane.base); u32 csum; csum = ast_cursor_calculate_checksum(src, width, height); @@ -177,7 +193,7 @@ static void ast_cursor_plane_helper_atomic_update(struct drm_plane *plane, struct ast_device *ast = to_ast_device(plane->dev); struct drm_rect damage; u64 dst_off = ast_plane->offset; - u8 __iomem *dst = ast_plane->vaddr; /* TODO: Use mapping abstraction properly */ + u8 __iomem *dst = ast_plane_vaddr(ast_plane); /* TODO: Use mapping abstraction properly */ u8 __iomem *sig = dst + AST_HWC_SIZE; /* TODO: Use mapping abstraction properly */ unsigned int offset_x, offset_y; u16 x, y; @@ -274,25 +290,16 @@ int ast_cursor_plane_init(struct ast_device *ast) struct ast_cursor_plane *ast_cursor_plane = &ast->cursor_plane; struct ast_plane *ast_plane = &ast_cursor_plane->base; struct drm_plane *cursor_plane = &ast_plane->base; - size_t size; - void __iomem *vaddr; - u64 offset; + unsigned long size; + long offset; int ret; - /* - * Allocate backing storage for cursors. The BOs are permanently - * pinned to the top end of the VRAM. - */ + size = ast_cursor_vram_size(); + offset = ast_cursor_vram_offset(ast); + if (offset < 0) + return offset; - size = roundup(AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE, PAGE_SIZE); - - if (ast->vram_fb_available < size) - return -ENOMEM; - - vaddr = ast->vram + ast->vram_fb_available - size; - offset = ast->vram_fb_available - size; - - ret = ast_plane_init(dev, ast_plane, vaddr, offset, size, + ret = ast_plane_init(dev, ast_plane, offset, size, 0x01, &ast_cursor_plane_funcs, ast_cursor_plane_formats, ARRAY_SIZE(ast_cursor_plane_formats), NULL, DRM_PLANE_TYPE_CURSOR); @@ -303,7 +310,5 @@ int ast_cursor_plane_init(struct ast_device *ast) drm_plane_helper_add(cursor_plane, &ast_cursor_plane_helper_funcs); drm_plane_enable_fb_damage_clips(cursor_plane); - ast->vram_fb_available -= size; - return 0; } diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index d2c2605d2728..2ee402096cd9 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -112,12 +112,9 @@ enum ast_config_mode { #define AST_MAX_HWC_WIDTH 64 #define AST_MAX_HWC_HEIGHT 64 - #define AST_HWC_PITCH (AST_MAX_HWC_WIDTH * SZ_2) #define AST_HWC_SIZE (AST_MAX_HWC_HEIGHT * AST_HWC_PITCH) -#define AST_HWC_SIGNATURE_SIZE 32 - /* * Planes */ @@ -125,7 +122,6 @@ enum ast_config_mode { struct ast_plane { struct drm_plane base; - void __iomem *vaddr; u64 offset; unsigned long size; }; @@ -183,7 +179,6 @@ struct ast_device { void __iomem *vram; unsigned long vram_base; unsigned long vram_size; - unsigned long vram_fb_available; struct mutex modeset_lock; /* Protects access to modeset I/O registers in ioregs */ @@ -340,14 +335,6 @@ static inline void ast_set_index_reg_mask(struct ast_device *ast, u32 base, u8 i __ast_write8_i_masked(ast->ioregs, base, index, preserve_mask, val); } -#define AST_VIDMEM_SIZE_8M 0x00800000 -#define AST_VIDMEM_SIZE_16M 0x01000000 -#define AST_VIDMEM_SIZE_32M 0x02000000 -#define AST_VIDMEM_SIZE_64M 0x04000000 -#define AST_VIDMEM_SIZE_128M 0x08000000 - -#define AST_VIDMEM_DEFAULT_SIZE AST_VIDMEM_SIZE_8M - struct ast_vbios_stdtable { u8 misc; u8 seq[4]; @@ -440,6 +427,7 @@ int ast_vga_output_init(struct ast_device *ast); int ast_sil164_output_init(struct ast_device *ast); /* ast_cursor.c */ +long ast_cursor_vram_offset(struct ast_device *ast); int ast_cursor_plane_init(struct ast_device *ast); /* ast dp501 */ @@ -454,11 +442,12 @@ int ast_astdp_output_init(struct ast_device *ast); /* ast_mode.c */ int ast_mode_config_init(struct ast_device *ast); int ast_plane_init(struct drm_device *dev, struct ast_plane *ast_plane, - void __iomem *vaddr, u64 offset, unsigned long size, + u64 offset, unsigned long size, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type type); +void __iomem *ast_plane_vaddr(struct ast_plane *ast); #endif diff --git a/drivers/gpu/drm/ast/ast_mm.c b/drivers/gpu/drm/ast/ast_mm.c index 6dfe6d9777d4..0bc140319464 100644 --- a/drivers/gpu/drm/ast/ast_mm.c +++ b/drivers/gpu/drm/ast/ast_mm.c @@ -35,36 +35,35 @@ static u32 ast_get_vram_size(struct ast_device *ast) { - u8 jreg; u32 vram_size; + u8 vgacr99, vgacraa; - vram_size = AST_VIDMEM_DEFAULT_SIZE; - jreg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xaa, 0xff); - switch (jreg & 3) { + vgacraa = ast_get_index_reg(ast, AST_IO_VGACRI, 0xaa); + switch (vgacraa & AST_IO_VGACRAA_VGAMEM_SIZE_MASK) { case 0: - vram_size = AST_VIDMEM_SIZE_8M; + vram_size = SZ_8M; break; case 1: - vram_size = AST_VIDMEM_SIZE_16M; + vram_size = SZ_16M; break; case 2: - vram_size = AST_VIDMEM_SIZE_32M; + vram_size = SZ_32M; break; case 3: - vram_size = AST_VIDMEM_SIZE_64M; + vram_size = SZ_64M; break; } - jreg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0x99, 0xff); - switch (jreg & 0x03) { + vgacr99 = ast_get_index_reg(ast, AST_IO_VGACRI, 0x99); + switch (vgacr99 & AST_IO_VGACR99_VGAMEM_RSRV_MASK) { case 1: - vram_size -= 0x100000; + vram_size -= SZ_1M; break; case 2: - vram_size -= 0x200000; + vram_size -= SZ_2M; break; case 3: - vram_size -= 0x400000; + vram_size -= SZ_4M; break; } @@ -93,7 +92,6 @@ int ast_mm_init(struct ast_device *ast) ast->vram_base = base; ast->vram_size = vram_size; - ast->vram_fb_available = vram_size; return 0; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index c3b950675485..1de832964e92 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -51,6 +51,26 @@ #define AST_LUT_SIZE 256 +#define AST_PRIMARY_PLANE_MAX_OFFSET (BIT(16) - 1) + +static unsigned long ast_fb_vram_offset(void) +{ + return 0; // with shmem, the primary plane is always at offset 0 +} + +static unsigned long ast_fb_vram_size(struct ast_device *ast) +{ + struct drm_device *dev = &ast->base; + unsigned long offset = ast_fb_vram_offset(); // starts at offset + long cursor_offset = ast_cursor_vram_offset(ast); // ends at cursor offset + + if (cursor_offset < 0) + cursor_offset = ast->vram_size; // no cursor; it's all ours + if (drm_WARN_ON_ONCE(dev, offset > cursor_offset)) + return 0; // cannot legally happen; signal error + return cursor_offset - offset; +} + static inline void ast_load_palette_index(struct ast_device *ast, u8 index, u8 red, u8 green, u8 blue) @@ -439,7 +459,7 @@ static void ast_wait_for_vretrace(struct ast_device *ast) */ int ast_plane_init(struct drm_device *dev, struct ast_plane *ast_plane, - void __iomem *vaddr, u64 offset, unsigned long size, + u64 offset, unsigned long size, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, @@ -448,7 +468,6 @@ int ast_plane_init(struct drm_device *dev, struct ast_plane *ast_plane, { struct drm_plane *plane = &ast_plane->base; - ast_plane->vaddr = vaddr; ast_plane->offset = offset; ast_plane->size = size; @@ -457,6 +476,13 @@ int ast_plane_init(struct drm_device *dev, struct ast_plane *ast_plane, type, NULL); } +void __iomem *ast_plane_vaddr(struct ast_plane *ast_plane) +{ + struct ast_device *ast = to_ast_device(ast_plane->base.dev); + + return ast->vram + ast_plane->offset; +} + /* * Primary plane */ @@ -503,7 +529,7 @@ static void ast_handle_damage(struct ast_plane *ast_plane, struct iosys_map *src struct drm_framebuffer *fb, const struct drm_rect *clip) { - struct iosys_map dst = IOSYS_MAP_INIT_VADDR_IOMEM(ast_plane->vaddr); + struct iosys_map dst = IOSYS_MAP_INIT_VADDR_IOMEM(ast_plane_vaddr(ast_plane)); iosys_map_incr(&dst, drm_fb_clip_offset(fb->pitches[0], fb->format, clip)); drm_fb_memcpy(&dst, fb->pitches, src, fb, clip); @@ -576,12 +602,12 @@ static int ast_primary_plane_helper_get_scanout_buffer(struct drm_plane *plane, { struct ast_plane *ast_plane = to_ast_plane(plane); - if (plane->state && plane->state->fb && ast_plane->vaddr) { + if (plane->state && plane->state->fb) { sb->format = plane->state->fb->format; sb->width = plane->state->fb->width; sb->height = plane->state->fb->height; sb->pitch[0] = plane->state->fb->pitches[0]; - iosys_map_set_vaddr_iomem(&sb->map[0], ast_plane->vaddr); + iosys_map_set_vaddr_iomem(&sb->map[0], ast_plane_vaddr(ast_plane)); return 0; } return -ENODEV; @@ -608,13 +634,11 @@ static int ast_primary_plane_init(struct ast_device *ast) struct drm_device *dev = &ast->base; struct ast_plane *ast_primary_plane = &ast->primary_plane; struct drm_plane *primary_plane = &ast_primary_plane->base; - void __iomem *vaddr = ast->vram; - u64 offset = 0; /* with shmem, the primary plane is always at offset 0 */ - unsigned long cursor_size = roundup(AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE, PAGE_SIZE); - unsigned long size = ast->vram_fb_available - cursor_size; + u64 offset = ast_fb_vram_offset(); + unsigned long size = ast_fb_vram_size(ast); int ret; - ret = ast_plane_init(dev, ast_primary_plane, vaddr, offset, size, + ret = ast_plane_init(dev, ast_primary_plane, offset, size, 0x01, &ast_primary_plane_funcs, ast_primary_plane_formats, ARRAY_SIZE(ast_primary_plane_formats), NULL, DRM_PLANE_TYPE_PRIMARY); @@ -922,9 +946,9 @@ static void ast_mode_config_helper_atomic_commit_tail(struct drm_atomic_state *s /* * Concurrent operations could possibly trigger a call to - * drm_connector_helper_funcs.get_modes by trying to read the - * display modes. Protect access to I/O registers by acquiring - * the I/O-register lock. Released in atomic_flush(). + * drm_connector_helper_funcs.get_modes by reading the display + * modes. Protect access to registers by acquiring the modeset + * lock. */ mutex_lock(&ast->modeset_lock); drm_atomic_helper_commit_tail(state); @@ -938,16 +962,20 @@ static const struct drm_mode_config_helper_funcs ast_mode_config_helper_funcs = static enum drm_mode_status ast_mode_config_mode_valid(struct drm_device *dev, const struct drm_display_mode *mode) { - static const unsigned long max_bpp = 4; /* DRM_FORMAT_XRGB8888 */ + const struct drm_format_info *info = drm_format_info(DRM_FORMAT_XRGB8888); struct ast_device *ast = to_ast_device(dev); - unsigned long fbsize, fbpages, max_fbpages; + unsigned long max_fb_size = ast_fb_vram_size(ast); + u64 pitch; - max_fbpages = (ast->vram_fb_available) >> PAGE_SHIFT; + if (drm_WARN_ON_ONCE(dev, !info)) + return MODE_ERROR; /* driver bug */ - fbsize = mode->hdisplay * mode->vdisplay * max_bpp; - fbpages = DIV_ROUND_UP(fbsize, PAGE_SIZE); - - if (fbpages > max_fbpages) + pitch = drm_format_info_min_pitch(info, 0, mode->hdisplay); + if (!pitch) + return MODE_BAD_WIDTH; + if (pitch > AST_PRIMARY_PLANE_MAX_OFFSET) + return MODE_BAD_WIDTH; /* maximum programmable pitch */ + if (pitch > max_fb_size / mode->vdisplay) return MODE_MEM; return MODE_OK; @@ -1018,10 +1046,7 @@ int ast_mode_config_init(struct ast_device *ast) return ret; drm_mode_config_reset(dev); - - ret = drmm_kms_helper_poll_init(dev); - if (ret) - return ret; + drmm_kms_helper_poll_init(dev); return 0; } diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 91e85e457bdf..37568cf3822c 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -1075,16 +1075,16 @@ static void get_ddr3_info(struct ast_device *ast, struct ast2300_dram_param *par switch (param->vram_size) { default: - case AST_VIDMEM_SIZE_8M: + case SZ_8M: param->dram_config |= 0x00; break; - case AST_VIDMEM_SIZE_16M: + case SZ_16M: param->dram_config |= 0x04; break; - case AST_VIDMEM_SIZE_32M: + case SZ_32M: param->dram_config |= 0x08; break; - case AST_VIDMEM_SIZE_64M: + case SZ_64M: param->dram_config |= 0x0c; break; } @@ -1446,16 +1446,16 @@ static void get_ddr2_info(struct ast_device *ast, struct ast2300_dram_param *par switch (param->vram_size) { default: - case AST_VIDMEM_SIZE_8M: + case SZ_8M: param->dram_config |= 0x00; break; - case AST_VIDMEM_SIZE_16M: + case SZ_16M: param->dram_config |= 0x04; break; - case AST_VIDMEM_SIZE_32M: + case SZ_32M: param->dram_config |= 0x08; break; - case AST_VIDMEM_SIZE_64M: + case SZ_64M: param->dram_config |= 0x0c; break; } @@ -1635,19 +1635,19 @@ static void ast_post_chip_2300(struct ast_device *ast) switch (temp & 0x0c) { default: case 0x00: - param.vram_size = AST_VIDMEM_SIZE_8M; + param.vram_size = SZ_8M; break; case 0x04: - param.vram_size = AST_VIDMEM_SIZE_16M; + param.vram_size = SZ_16M; break; case 0x08: - param.vram_size = AST_VIDMEM_SIZE_32M; + param.vram_size = SZ_32M; break; case 0x0c: - param.vram_size = AST_VIDMEM_SIZE_64M; + param.vram_size = SZ_64M; break; } diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h index bb2cc1d8b84e..e15adaf3a80e 100644 --- a/drivers/gpu/drm/ast/ast_reg.h +++ b/drivers/gpu/drm/ast/ast_reg.h @@ -30,9 +30,11 @@ #define AST_IO_VGACRI (0x54) #define AST_IO_VGACR80_PASSWORD (0xa8) +#define AST_IO_VGACR99_VGAMEM_RSRV_MASK GENMASK(1, 0) #define AST_IO_VGACRA1_VGAIO_DISABLED BIT(1) #define AST_IO_VGACRA1_MMIO_ENABLED BIT(2) #define AST_IO_VGACRA3_DVO_ENABLED BIT(7) +#define AST_IO_VGACRAA_VGAMEM_SIZE_MASK GENMASK(1, 0) #define AST_IO_VGACRB6_HSYNC_OFF BIT(0) #define AST_IO_VGACRB6_VSYNC_OFF BIT(1) #define AST_IO_VGACRCB_HWC_16BPP BIT(0) /* set: ARGB4444, cleared: 2bpp palette */ diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 09a1be234f71..b9e0ca85226a 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -16,6 +16,7 @@ config DRM_AUX_BRIDGE tristate depends on DRM_BRIDGE && OF select AUXILIARY_BUS + select DRM_KMS_HELPER select DRM_PANEL_BRIDGE help Simple transparent bridge that is used by several non-DRM drivers to diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 050dae338ffe..1257009e850c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -948,13 +948,14 @@ static enum drm_mode_status adv7511_bridge_mode_valid(struct drm_bridge *bridge, } static int adv7511_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct adv7511 *adv = bridge_to_adv7511(bridge); int ret = 0; if (adv->next_bridge) { - ret = drm_bridge_attach(bridge->encoder, adv->next_bridge, bridge, + ret = drm_bridge_attach(encoder, adv->next_bridge, bridge, flags | DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret) return ret; diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c index 83d711ee3a2e..f3fe47b12edc 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c +++ b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c @@ -143,35 +143,7 @@ static int anx6345_dp_link_training(struct anx6345 *anx6345) if (err) return err; - /* - * Power up the sink (DP_SET_POWER register is only available on DPCD - * v1.1 and later). - */ - if (anx6345->dpcd[DP_DPCD_REV] >= 0x11) { - err = drm_dp_dpcd_readb(&anx6345->aux, DP_SET_POWER, &dpcd[0]); - if (err < 0) { - DRM_ERROR("Failed to read DP_SET_POWER register: %d\n", - err); - return err; - } - - dpcd[0] &= ~DP_SET_POWER_MASK; - dpcd[0] |= DP_SET_POWER_D0; - - err = drm_dp_dpcd_writeb(&anx6345->aux, DP_SET_POWER, dpcd[0]); - if (err < 0) { - DRM_ERROR("Failed to power up DisplayPort link: %d\n", - err); - return err; - } - - /* - * According to the DP 1.1 specification, a "Sink Device must - * exit the power saving state within 1 ms" (Section 2.5.3.1, - * Table 5-52, "Sink Control Field" (register 0x600). - */ - usleep_range(1000, 2000); - } + drm_dp_link_power_up(&anx6345->aux, anx6345->dpcd[DP_DPCD_REV]); /* Possibly enable downspread on the sink */ err = regmap_write(anx6345->map[I2C_IDX_DPTX], @@ -517,6 +489,7 @@ static const struct drm_connector_funcs anx6345_connector_funcs = { }; static int anx6345_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct anx6345 *anx6345 = bridge_to_anx6345(bridge); @@ -553,7 +526,7 @@ static int anx6345_bridge_attach(struct drm_bridge *bridge, anx6345->connector.polled = DRM_CONNECTOR_POLL_HPD; err = drm_connector_attach_encoder(&anx6345->connector, - bridge->encoder); + encoder); if (err) { DRM_ERROR("Failed to link up connector to encoder: %d\n", err); goto connector_cleanup; @@ -691,9 +664,10 @@ static int anx6345_i2c_probe(struct i2c_client *client) struct device *dev; int i, err; - anx6345 = devm_kzalloc(&client->dev, sizeof(*anx6345), GFP_KERNEL); - if (!anx6345) - return -ENOMEM; + anx6345 = devm_drm_bridge_alloc(&client->dev, struct anx6345, bridge, + &anx6345_bridge_funcs); + if (IS_ERR(anx6345)) + return PTR_ERR(anx6345); mutex_init(&anx6345->lock); @@ -765,7 +739,6 @@ static int anx6345_i2c_probe(struct i2c_client *client) /* Look for supported chip ID */ anx6345_poweron(anx6345); if (anx6345_get_chip_id(anx6345)) { - anx6345->bridge.funcs = &anx6345_bridge_funcs; drm_bridge_add(&anx6345->bridge); return 0; diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c index f74694bb9c50..a83020d6576f 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c +++ b/drivers/gpu/drm/bridge/analogix/analogix-anx78xx.c @@ -656,35 +656,7 @@ static int anx78xx_dp_link_training(struct anx78xx *anx78xx) if (err) return err; - /* - * Power up the sink (DP_SET_POWER register is only available on DPCD - * v1.1 and later). - */ - if (anx78xx->dpcd[DP_DPCD_REV] >= 0x11) { - err = drm_dp_dpcd_readb(&anx78xx->aux, DP_SET_POWER, &dpcd[0]); - if (err < 0) { - DRM_ERROR("Failed to read DP_SET_POWER register: %d\n", - err); - return err; - } - - dpcd[0] &= ~DP_SET_POWER_MASK; - dpcd[0] |= DP_SET_POWER_D0; - - err = drm_dp_dpcd_writeb(&anx78xx->aux, DP_SET_POWER, dpcd[0]); - if (err < 0) { - DRM_ERROR("Failed to power up DisplayPort link: %d\n", - err); - return err; - } - - /* - * According to the DP 1.1 specification, a "Sink Device must - * exit the power saving state within 1 ms" (Section 2.5.3.1, - * Table 5-52, "Sink Control Field" (register 0x600). - */ - usleep_range(1000, 2000); - } + drm_dp_link_power_up(&anx78xx->aux, anx78xx->dpcd[DP_DPCD_REV]); /* Possibly enable downspread on the sink */ err = regmap_write(anx78xx->map[I2C_IDX_TX_P0], @@ -888,6 +860,7 @@ static const struct drm_connector_funcs anx78xx_connector_funcs = { }; static int anx78xx_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct anx78xx *anx78xx = bridge_to_anx78xx(bridge); @@ -924,7 +897,7 @@ static int anx78xx_bridge_attach(struct drm_bridge *bridge, anx78xx->connector.polled = DRM_CONNECTOR_POLL_HPD; err = drm_connector_attach_encoder(&anx78xx->connector, - bridge->encoder); + encoder); if (err) { DRM_ERROR("Failed to link up connector to encoder: %d\n", err); goto connector_cleanup; diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 071168aa0c3b..a761941bc3c2 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -838,10 +838,7 @@ static int analogix_dp_commit(struct analogix_dp_device *dp) int ret; /* Keep the panel disabled while we configure video */ - if (dp->plat_data->panel) { - if (drm_panel_disable(dp->plat_data->panel)) - DRM_ERROR("failed to disable the panel\n"); - } + drm_panel_disable(dp->plat_data->panel); ret = analogix_dp_train_link(dp); if (ret) { @@ -863,13 +860,7 @@ static int analogix_dp_commit(struct analogix_dp_device *dp) } /* Safe to enable the panel now */ - if (dp->plat_data->panel) { - ret = drm_panel_enable(dp->plat_data->panel); - if (ret) { - DRM_ERROR("failed to enable the panel\n"); - return ret; - } - } + drm_panel_enable(dp->plat_data->panel); /* Check whether panel supports fast training */ ret = analogix_dp_fast_link_train_detection(dp); @@ -955,67 +946,15 @@ static int analogix_dp_disable_psr(struct analogix_dp_device *dp) return analogix_dp_send_psr_spd(dp, &psr_vsc, true); } -/* - * This function is a bit of a catch-all for panel preparation, hopefully - * simplifying the logic of functions that need to prepare/unprepare the panel - * below. - * - * If @prepare is true, this function will prepare the panel. Conversely, if it - * is false, the panel will be unprepared. - * - * If @is_modeset_prepare is true, the function will disregard the current state - * of the panel and either prepare/unprepare the panel based on @prepare. Once - * it finishes, it will update dp->panel_is_modeset to reflect the current state - * of the panel. - */ -static int analogix_dp_prepare_panel(struct analogix_dp_device *dp, - bool prepare, bool is_modeset_prepare) -{ - int ret = 0; - - if (!dp->plat_data->panel) - return 0; - - mutex_lock(&dp->panel_lock); - - /* - * Exit early if this is a temporary prepare/unprepare and we're already - * modeset (since we neither want to prepare twice or unprepare early). - */ - if (dp->panel_is_modeset && !is_modeset_prepare) - goto out; - - if (prepare) - ret = drm_panel_prepare(dp->plat_data->panel); - else - ret = drm_panel_unprepare(dp->plat_data->panel); - - if (ret) - goto out; - - if (is_modeset_prepare) - dp->panel_is_modeset = prepare; - -out: - mutex_unlock(&dp->panel_lock); - return ret; -} - static int analogix_dp_get_modes(struct drm_connector *connector) { struct analogix_dp_device *dp = to_dp(connector); const struct drm_edid *drm_edid; - int ret, num_modes = 0; + int num_modes = 0; if (dp->plat_data->panel) { num_modes += drm_panel_get_modes(dp->plat_data->panel, connector); } else { - ret = analogix_dp_prepare_panel(dp, true, false); - if (ret) { - DRM_ERROR("Failed to prepare panel (%d)\n", ret); - return 0; - } - drm_edid = drm_edid_read_ddc(connector, &dp->aux.ddc); drm_edid_connector_update(&dp->connector, drm_edid); @@ -1024,10 +963,6 @@ static int analogix_dp_get_modes(struct drm_connector *connector) num_modes += drm_edid_connector_add_modes(&dp->connector); drm_edid_free(drm_edid); } - - ret = analogix_dp_prepare_panel(dp, false, false); - if (ret) - DRM_ERROR("Failed to unprepare panel (%d)\n", ret); } if (dp->plat_data->get_modes) @@ -1082,24 +1017,13 @@ analogix_dp_detect(struct drm_connector *connector, bool force) { struct analogix_dp_device *dp = to_dp(connector); enum drm_connector_status status = connector_status_disconnected; - int ret; if (dp->plat_data->panel) return connector_status_connected; - ret = analogix_dp_prepare_panel(dp, true, false); - if (ret) { - DRM_ERROR("Failed to prepare panel (%d)\n", ret); - return connector_status_disconnected; - } - if (!analogix_dp_detect_hpd(dp)) status = connector_status_connected; - ret = analogix_dp_prepare_panel(dp, false, false); - if (ret) - DRM_ERROR("Failed to unprepare panel (%d)\n", ret); - return status; } @@ -1113,10 +1037,10 @@ static const struct drm_connector_funcs analogix_dp_connector_funcs = { }; static int analogix_dp_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct analogix_dp_device *dp = bridge->driver_private; - struct drm_encoder *encoder = dp->encoder; struct drm_connector *connector = NULL; int ret = 0; @@ -1203,7 +1127,6 @@ static void analogix_dp_bridge_atomic_pre_enable(struct drm_bridge *bridge, struct analogix_dp_device *dp = bridge->driver_private; struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state; - int ret; crtc = analogix_dp_get_new_crtc(dp, old_state); if (!crtc) @@ -1214,9 +1137,7 @@ static void analogix_dp_bridge_atomic_pre_enable(struct drm_bridge *bridge, if (old_crtc_state && old_crtc_state->self_refresh_active) return; - ret = analogix_dp_prepare_panel(dp, true, true); - if (ret) - DRM_ERROR("failed to setup the panel ret = %d\n", ret); + drm_panel_prepare(dp->plat_data->panel); } static int analogix_dp_set_bridge(struct analogix_dp_device *dp) @@ -1296,17 +1217,11 @@ static void analogix_dp_bridge_atomic_enable(struct drm_bridge *bridge, static void analogix_dp_bridge_disable(struct drm_bridge *bridge) { struct analogix_dp_device *dp = bridge->driver_private; - int ret; if (dp->dpms_mode != DRM_MODE_DPMS_ON) return; - if (dp->plat_data->panel) { - if (drm_panel_disable(dp->plat_data->panel)) { - DRM_ERROR("failed to disable the panel\n"); - return; - } - } + drm_panel_disable(dp->plat_data->panel); disable_irq(dp->irq); @@ -1314,9 +1229,7 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) pm_runtime_put_sync(dp->dev); - ret = analogix_dp_prepare_panel(dp, false, true); - if (ret) - DRM_ERROR("failed to setup the panel ret = %d\n", ret); + drm_panel_unprepare(dp->plat_data->panel); dp->fast_train_enable = false; dp->psr_supported = false; @@ -1505,6 +1418,10 @@ static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) video_info->max_link_rate = 0x0A; video_info->max_lane_count = 0x04; break; + case RK3588_EDP: + video_info->max_link_rate = 0x14; + video_info->max_lane_count = 0x04; + break; case EXYNOS_DP: /* * NOTE: those property parseing code is used for @@ -1540,6 +1457,26 @@ out: return ret; } +static int analogix_dpaux_wait_hpd_asserted(struct drm_dp_aux *aux, unsigned long wait_us) +{ + struct analogix_dp_device *dp = to_dp(aux); + int val; + int ret; + + if (dp->force_hpd) + return 0; + + pm_runtime_get_sync(dp->dev); + + ret = readx_poll_timeout(analogix_dp_get_plug_in_status, dp, val, !val, + wait_us / 100, wait_us); + + pm_runtime_mark_last_busy(dp->dev); + pm_runtime_put_autosuspend(dp->dev); + + return ret; +} + struct analogix_dp_device * analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) { @@ -1560,9 +1497,6 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) dp->dev = &pdev->dev; dp->dpms_mode = DRM_MODE_DPMS_OFF; - mutex_init(&dp->panel_lock); - dp->panel_is_modeset = false; - /* * platform dp driver need containor_of the plat_data to get * the driver private data, so we need to store the point of @@ -1625,10 +1559,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) * that we can get the current state of the GPIO. */ dp->irq = gpiod_to_irq(dp->hpd_gpiod); - irq_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; + irq_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_NO_AUTOEN; } else { dp->irq = platform_get_irq(pdev, 0); - irq_flags = 0; + irq_flags = IRQF_NO_AUTOEN; } if (dp->irq == -ENXIO) { @@ -1645,7 +1579,18 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) dev_err(&pdev->dev, "failed to request irq\n"); goto err_disable_clk; } - disable_irq(dp->irq); + + dp->aux.name = "DP-AUX"; + dp->aux.transfer = analogix_dpaux_transfer; + dp->aux.wait_hpd_asserted = analogix_dpaux_wait_hpd_asserted; + dp->aux.dev = dp->dev; + drm_dp_aux_init(&dp->aux); + + pm_runtime_use_autosuspend(dp->dev); + pm_runtime_set_autosuspend_delay(dp->dev, 100); + ret = devm_pm_runtime_enable(dp->dev); + if (ret) + goto err_disable_clk; return dp; @@ -1681,6 +1626,7 @@ int analogix_dp_resume(struct analogix_dp_device *dp) if (dp->plat_data->power_on) dp->plat_data->power_on(dp->plat_data); + phy_set_mode(dp->phy, PHY_MODE_DP); phy_power_on(dp->phy); analogix_dp_init_dp(dp); @@ -1696,25 +1642,12 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) dp->drm_dev = drm_dev; dp->encoder = dp->plat_data->encoder; - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_use_autosuspend(dp->dev); - pm_runtime_set_autosuspend_delay(dp->dev, 100); - pm_runtime_enable(dp->dev); - } else { - ret = analogix_dp_resume(dp); - if (ret) - return ret; - } - - dp->aux.name = "DP-AUX"; - dp->aux.transfer = analogix_dpaux_transfer; - dp->aux.dev = dp->dev; dp->aux.drm_dev = drm_dev; ret = drm_dp_aux_register(&dp->aux); if (ret) { DRM_ERROR("failed to register AUX (%d)\n", ret); - goto err_disable_pm_runtime; + return ret; } ret = analogix_dp_create_bridge(drm_dev, dp); @@ -1727,13 +1660,6 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) err_unregister_aux: drm_dp_aux_unregister(&dp->aux); -err_disable_pm_runtime: - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - } else { - analogix_dp_suspend(dp); - } return ret; } @@ -1744,19 +1670,9 @@ void analogix_dp_unbind(struct analogix_dp_device *dp) analogix_dp_bridge_disable(dp->bridge); dp->connector.funcs->destroy(&dp->connector); - if (dp->plat_data->panel) { - if (drm_panel_unprepare(dp->plat_data->panel)) - DRM_ERROR("failed to turnoff the panel\n"); - } + drm_panel_unprepare(dp->plat_data->panel); drm_dp_aux_unregister(&dp->aux); - - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - } else { - analogix_dp_suspend(dp); - } } EXPORT_SYMBOL_GPL(analogix_dp_unbind); @@ -1782,6 +1698,20 @@ int analogix_dp_stop_crc(struct drm_connector *connector) } EXPORT_SYMBOL_GPL(analogix_dp_stop_crc); +struct analogix_dp_plat_data *analogix_dp_aux_to_plat_data(struct drm_dp_aux *aux) +{ + struct analogix_dp_device *dp = to_dp(aux); + + return dp->plat_data; +} +EXPORT_SYMBOL_GPL(analogix_dp_aux_to_plat_data); + +struct drm_dp_aux *analogix_dp_get_aux(struct analogix_dp_device *dp) +{ + return &dp->aux; +} +EXPORT_SYMBOL_GPL(analogix_dp_get_aux); + MODULE_AUTHOR("Jingoo Han "); MODULE_DESCRIPTION("Analogix DP Core Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h index 774d11574b09..2b54120ba4a3 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h @@ -169,9 +169,6 @@ struct analogix_dp_device { bool fast_train_enable; bool psr_supported; - struct mutex panel_lock; - bool panel_is_modeset; - struct analogix_dp_plat_data *plat_data; }; diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c index 3afc73c858c4..38fd8d5014d2 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -513,10 +514,24 @@ void analogix_dp_enable_sw_function(struct analogix_dp_device *dp) void analogix_dp_set_link_bandwidth(struct analogix_dp_device *dp, u32 bwtype) { u32 reg; + int ret; reg = bwtype; if ((bwtype == DP_LINK_BW_2_7) || (bwtype == DP_LINK_BW_1_62)) writel(reg, dp->reg_base + ANALOGIX_DP_LINK_BW_SET); + + if (dp->phy) { + union phy_configure_opts phy_cfg = {0}; + + phy_cfg.dp.link_rate = + drm_dp_bw_code_to_link_rate(dp->link_train.link_rate) / 100; + phy_cfg.dp.set_rate = true; + ret = phy_configure(dp->phy, &phy_cfg); + if (ret && ret != -EOPNOTSUPP) { + dev_err(dp->dev, "%s: phy_configure() failed: %d\n", __func__, ret); + return; + } + } } void analogix_dp_get_link_bandwidth(struct analogix_dp_device *dp, u32 *bwtype) @@ -530,9 +545,22 @@ void analogix_dp_get_link_bandwidth(struct analogix_dp_device *dp, u32 *bwtype) void analogix_dp_set_lane_count(struct analogix_dp_device *dp, u32 count) { u32 reg; + int ret; reg = count; writel(reg, dp->reg_base + ANALOGIX_DP_LANE_COUNT_SET); + + if (dp->phy) { + union phy_configure_opts phy_cfg = {0}; + + phy_cfg.dp.lanes = dp->link_train.lane_count; + phy_cfg.dp.set_lanes = true; + ret = phy_configure(dp->phy, &phy_cfg); + if (ret && ret != -EOPNOTSUPP) { + dev_err(dp->dev, "%s: phy_configure() failed: %d\n", __func__, ret); + return; + } + } } void analogix_dp_get_lane_count(struct analogix_dp_device *dp, u32 *count) @@ -546,10 +574,34 @@ void analogix_dp_get_lane_count(struct analogix_dp_device *dp, u32 *count) void analogix_dp_set_lane_link_training(struct analogix_dp_device *dp) { u8 lane; + int ret; for (lane = 0; lane < dp->link_train.lane_count; lane++) writel(dp->link_train.training_lane[lane], dp->reg_base + ANALOGIX_DP_LN0_LINK_TRAINING_CTL + 4 * lane); + + if (dp->phy) { + union phy_configure_opts phy_cfg = {0}; + + for (lane = 0; lane < dp->link_train.lane_count; lane++) { + u8 training_lane = dp->link_train.training_lane[lane]; + u8 vs, pe; + + vs = (training_lane & DP_TRAIN_VOLTAGE_SWING_MASK) >> + DP_TRAIN_VOLTAGE_SWING_SHIFT; + pe = (training_lane & DP_TRAIN_PRE_EMPHASIS_MASK) >> + DP_TRAIN_PRE_EMPHASIS_SHIFT; + phy_cfg.dp.voltage[lane] = vs; + phy_cfg.dp.pre[lane] = pe; + } + + phy_cfg.dp.set_voltages = true; + ret = phy_configure(dp->phy, &phy_cfg); + if (ret && ret != -EOPNOTSUPP) { + dev_err(dp->dev, "%s: phy_configure() failed: %d\n", __func__, ret); + return; + } + } } u32 analogix_dp_get_lane_link_training(struct analogix_dp_device *dp, u8 lane) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 0b97b66de577..8a9079c2ed5c 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1257,10 +1257,10 @@ static void anx7625_power_on(struct anx7625_data *ctx) usleep_range(11000, 12000); /* Power on pin enable */ - gpiod_set_value(ctx->pdata.gpio_p_on, 1); + gpiod_set_value_cansleep(ctx->pdata.gpio_p_on, 1); usleep_range(10000, 11000); /* Power reset pin enable */ - gpiod_set_value(ctx->pdata.gpio_reset, 1); + gpiod_set_value_cansleep(ctx->pdata.gpio_reset, 1); usleep_range(10000, 11000); DRM_DEV_DEBUG_DRIVER(dev, "power on !\n"); @@ -1280,9 +1280,9 @@ static void anx7625_power_standby(struct anx7625_data *ctx) return; } - gpiod_set_value(ctx->pdata.gpio_reset, 0); + gpiod_set_value_cansleep(ctx->pdata.gpio_reset, 0); usleep_range(1000, 1100); - gpiod_set_value(ctx->pdata.gpio_p_on, 0); + gpiod_set_value_cansleep(ctx->pdata.gpio_p_on, 0); usleep_range(1000, 1100); ret = regulator_bulk_disable(ARRAY_SIZE(ctx->pdata.supplies), @@ -1814,9 +1814,6 @@ static enum drm_connector_status anx7625_sink_detect(struct anx7625_data *ctx) DRM_DEV_DEBUG_DRIVER(dev, "sink detect\n"); - if (ctx->pdata.panel_bridge) - return connector_status_connected; - return ctx->hpd_status ? connector_status_connected : connector_status_disconnected; } @@ -2141,6 +2138,7 @@ static void hdcp_check_work_func(struct work_struct *work) } static int anx7625_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct anx7625_data *ctx = bridge_to_anx7625(bridge); @@ -2159,7 +2157,7 @@ static int anx7625_bridge_attach(struct drm_bridge *bridge, } if (ctx->pdata.panel_bridge) { - err = drm_bridge_attach(bridge->encoder, + err = drm_bridge_attach(encoder, ctx->pdata.panel_bridge, &ctx->bridge, flags); if (err) @@ -2474,6 +2472,22 @@ static const struct drm_edid *anx7625_bridge_edid_read(struct drm_bridge *bridge return anx7625_edid_read(ctx); } +static void anx7625_bridge_hpd_enable(struct drm_bridge *bridge) +{ + struct anx7625_data *ctx = bridge_to_anx7625(bridge); + struct device *dev = ctx->dev; + + pm_runtime_get_sync(dev); +} + +static void anx7625_bridge_hpd_disable(struct drm_bridge *bridge) +{ + struct anx7625_data *ctx = bridge_to_anx7625(bridge); + struct device *dev = ctx->dev; + + pm_runtime_put_sync(dev); +} + static const struct drm_bridge_funcs anx7625_bridge_funcs = { .attach = anx7625_bridge_attach, .detach = anx7625_bridge_detach, @@ -2487,6 +2501,8 @@ static const struct drm_bridge_funcs anx7625_bridge_funcs = { .atomic_reset = drm_atomic_helper_bridge_reset, .detect = anx7625_bridge_detect, .edid_read = anx7625_bridge_edid_read, + .hpd_enable = anx7625_bridge_hpd_enable, + .hpd_disable = anx7625_bridge_hpd_disable, }; static int anx7625_register_i2c_dummy_clients(struct anx7625_data *ctx, @@ -2568,12 +2584,6 @@ static const struct dev_pm_ops anx7625_pm_ops = { anx7625_runtime_pm_resume, NULL) }; -static void anx7625_runtime_disable(void *data) -{ - pm_runtime_dont_use_autosuspend(data); - pm_runtime_disable(data); -} - static int anx7625_link_bridge(struct drm_dp_aux *aux) { struct anx7625_data *platform = container_of(aux, struct anx7625_data, aux); @@ -2590,9 +2600,8 @@ static int anx7625_link_bridge(struct drm_dp_aux *aux) platform->bridge.of_node = dev->of_node; if (!anx7625_of_panel_on_aux_bus(dev)) platform->bridge.ops |= DRM_BRIDGE_OP_EDID; - if (!platform->pdata.panel_bridge) - platform->bridge.ops |= DRM_BRIDGE_OP_HPD | - DRM_BRIDGE_OP_DETECT; + if (!platform->pdata.panel_bridge || !anx7625_of_panel_on_aux_bus(dev)) + platform->bridge.ops |= DRM_BRIDGE_OP_HPD | DRM_BRIDGE_OP_DETECT; platform->bridge.type = platform->pdata.panel_bridge ? DRM_MODE_CONNECTOR_eDP : DRM_MODE_CONNECTOR_DisplayPort; @@ -2707,11 +2716,10 @@ static int anx7625_i2c_probe(struct i2c_client *client) goto free_wq; } - pm_runtime_enable(dev); pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); pm_suspend_ignore_children(dev, true); - ret = devm_add_action_or_reset(dev, anx7625_runtime_disable, dev); + ret = devm_pm_runtime_enable(dev); if (ret) goto free_wq; @@ -2771,7 +2779,6 @@ static void anx7625_i2c_remove(struct i2c_client *client) if (platform->hdcp_workqueue) { cancel_delayed_work(&platform->hdcp_work); - flush_workqueue(platform->hdcp_workqueue); destroy_workqueue(platform->hdcp_workqueue); } diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c index 015983c015e5..c179b86d208f 100644 --- a/drivers/gpu/drm/bridge/aux-bridge.c +++ b/drivers/gpu/drm/bridge/aux-bridge.c @@ -86,6 +86,7 @@ struct drm_aux_bridge_data { }; static int drm_aux_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct drm_aux_bridge_data *data; @@ -95,7 +96,7 @@ static int drm_aux_bridge_attach(struct drm_bridge *bridge, data = container_of(bridge, struct drm_aux_bridge_data, bridge); - return drm_bridge_attach(bridge->encoder, data->next_bridge, bridge, + return drm_bridge_attach(encoder, data->next_bridge, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); } diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index 48f297c78ee6..b3f588b71a7d 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -156,6 +156,7 @@ void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status sta EXPORT_SYMBOL_GPL(drm_aux_hpd_bridge_notify); static int drm_aux_hpd_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL; diff --git a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c index c7a0247e06ad..b022dd6e6b6e 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c @@ -425,6 +425,17 @@ #define DSI_NULL_FRAME_OVERHEAD 6 #define DSI_EOT_PKT_SIZE 4 +struct cdns_dsi_bridge_state { + struct drm_bridge_state base; + struct cdns_dsi_cfg dsi_cfg; +}; + +static inline struct cdns_dsi_bridge_state * +to_cdns_dsi_bridge_state(struct drm_bridge_state *bridge_state) +{ + return container_of(bridge_state, struct cdns_dsi_bridge_state, base); +} + static inline struct cdns_dsi *input_to_dsi(struct cdns_dsi_input *input) { return container_of(input, struct cdns_dsi, input); @@ -568,15 +579,18 @@ static int cdns_dsi_check_conf(struct cdns_dsi *dsi, struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy; unsigned long dsi_hss_hsa_hse_hbp; unsigned int nlanes = output->dev->lanes; + int mode_clock = (mode_valid_check ? mode->clock : mode->crtc_clock); int ret; ret = cdns_dsi_mode2cfg(dsi, mode, dsi_cfg, mode_valid_check); if (ret) return ret; - phy_mipi_dphy_get_default_config(mode->crtc_clock * 1000, - mipi_dsi_pixel_format_to_bpp(output->dev->format), - nlanes, phy_cfg); + ret = phy_mipi_dphy_get_default_config(mode_clock * 1000, + mipi_dsi_pixel_format_to_bpp(output->dev->format), + nlanes, phy_cfg); + if (ret) + return ret; ret = cdns_dsi_adjust_phy_config(dsi, dsi_cfg, phy_cfg, mode, mode_valid_check); if (ret) @@ -605,6 +619,7 @@ static int cdns_dsi_check_conf(struct cdns_dsi *dsi, } static int cdns_dsi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); @@ -617,7 +632,7 @@ static int cdns_dsi_bridge_attach(struct drm_bridge *bridge, return -ENOTSUPP; } - return drm_bridge_attach(bridge->encoder, output->bridge, bridge, + return drm_bridge_attach(encoder, output->bridge, bridge, flags); } @@ -655,7 +670,8 @@ cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } -static void cdns_dsi_bridge_disable(struct drm_bridge *bridge) +static void cdns_dsi_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); struct cdns_dsi *dsi = input_to_dsi(input); @@ -675,11 +691,17 @@ static void cdns_dsi_bridge_disable(struct drm_bridge *bridge) pm_runtime_put(dsi->base.dev); } -static void cdns_dsi_bridge_post_disable(struct drm_bridge *bridge) +static void cdns_dsi_bridge_atomic_post_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); struct cdns_dsi *dsi = input_to_dsi(input); + dsi->phy_initialized = false; + dsi->link_initialized = false; + phy_power_off(dsi->dphy); + phy_exit(dsi->dphy); + pm_runtime_put(dsi->base.dev); } @@ -752,32 +774,59 @@ static void cdns_dsi_init_link(struct cdns_dsi *dsi) dsi->link_initialized = true; } -static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) +static void cdns_dsi_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); struct cdns_dsi *dsi = input_to_dsi(input); struct cdns_dsi_output *output = &dsi->output; + struct drm_connector_state *conn_state; + struct drm_crtc_state *crtc_state; + struct cdns_dsi_bridge_state *dsi_state; + struct drm_bridge_state *new_bridge_state; struct drm_display_mode *mode; struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy; + struct drm_connector *connector; unsigned long tx_byte_period; struct cdns_dsi_cfg dsi_cfg; - u32 tmp, reg_wakeup, div; + u32 tmp, reg_wakeup, div, status; int nlanes; if (WARN_ON(pm_runtime_get_sync(dsi->base.dev) < 0)) return; + new_bridge_state = drm_atomic_get_new_bridge_state(state, bridge); + if (WARN_ON(!new_bridge_state)) + return; + + dsi_state = to_cdns_dsi_bridge_state(new_bridge_state); + dsi_cfg = dsi_state->dsi_cfg; + if (dsi->platform_ops && dsi->platform_ops->enable) dsi->platform_ops->enable(dsi); - mode = &bridge->encoder->crtc->state->adjusted_mode; + connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + conn_state = drm_atomic_get_new_connector_state(state, connector); + crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + mode = &crtc_state->adjusted_mode; nlanes = output->dev->lanes; - WARN_ON_ONCE(cdns_dsi_check_conf(dsi, mode, &dsi_cfg, false)); - cdns_dsi_hs_init(dsi); cdns_dsi_init_link(dsi); + /* + * Now that the DSI Link and DSI Phy are initialized, + * wait for the CLK and Data Lanes to be ready. + */ + tmp = CLK_LANE_RDY; + for (int i = 0; i < nlanes; i++) + tmp |= DATA_LANE_RDY(i); + + if (readl_poll_timeout(dsi->regs + MCTL_MAIN_STS, status, + (tmp == (status & tmp)), 100, 500000)) + dev_err(dsi->base.dev, + "Timed Out: DSI-DPhy Clock and Data Lanes not ready.\n"); + writel(HBP_LEN(dsi_cfg.hbp) | HSA_LEN(dsi_cfg.hsa), dsi->regs + VID_HSIZE1); writel(HFP_LEN(dsi_cfg.hfp) | HACT_LEN(dsi_cfg.hact), @@ -892,7 +941,8 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) writel(tmp, dsi->regs + MCTL_MAIN_EN); } -static void cdns_dsi_bridge_pre_enable(struct drm_bridge *bridge) +static void cdns_dsi_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); struct cdns_dsi *dsi = input_to_dsi(input); @@ -904,13 +954,109 @@ static void cdns_dsi_bridge_pre_enable(struct drm_bridge *bridge) cdns_dsi_hs_init(dsi); } +static u32 *cdns_dsi_bridge_get_input_bus_fmts(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + u32 output_fmt, + unsigned int *num_input_fmts) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_output *output = &dsi->output; + u32 *input_fmts; + + *num_input_fmts = 0; + + input_fmts = kzalloc(sizeof(*input_fmts), GFP_KERNEL); + if (!input_fmts) + return NULL; + + input_fmts[0] = drm_mipi_dsi_get_input_bus_fmt(output->dev->format); + if (!input_fmts[0]) + return NULL; + + *num_input_fmts = 1; + + return input_fmts; +} + +static int cdns_dsi_bridge_atomic_check(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_bridge_state *dsi_state = to_cdns_dsi_bridge_state(bridge_state); + const struct drm_display_mode *mode = &crtc_state->mode; + struct cdns_dsi_cfg *dsi_cfg = &dsi_state->dsi_cfg; + + return cdns_dsi_check_conf(dsi, mode, dsi_cfg, false); +} + +static struct drm_bridge_state * +cdns_dsi_bridge_atomic_duplicate_state(struct drm_bridge *bridge) +{ + struct cdns_dsi_bridge_state *dsi_state, *old_dsi_state; + struct drm_bridge_state *bridge_state; + + if (WARN_ON(!bridge->base.state)) + return NULL; + + bridge_state = drm_priv_to_bridge_state(bridge->base.state); + old_dsi_state = to_cdns_dsi_bridge_state(bridge_state); + + dsi_state = kzalloc(sizeof(*dsi_state), GFP_KERNEL); + if (!dsi_state) + return NULL; + + __drm_atomic_helper_bridge_duplicate_state(bridge, &dsi_state->base); + + memcpy(&dsi_state->dsi_cfg, &old_dsi_state->dsi_cfg, + sizeof(dsi_state->dsi_cfg)); + + return &dsi_state->base; +} + +static void +cdns_dsi_bridge_atomic_destroy_state(struct drm_bridge *bridge, + struct drm_bridge_state *state) +{ + struct cdns_dsi_bridge_state *dsi_state; + + dsi_state = to_cdns_dsi_bridge_state(state); + + kfree(dsi_state); +} + +static struct drm_bridge_state * +cdns_dsi_bridge_atomic_reset(struct drm_bridge *bridge) +{ + struct cdns_dsi_bridge_state *dsi_state; + + dsi_state = kzalloc(sizeof(*dsi_state), GFP_KERNEL); + if (!dsi_state) + return NULL; + + memset(dsi_state, 0, sizeof(*dsi_state)); + dsi_state->base.bridge = bridge; + + return &dsi_state->base; +} + static const struct drm_bridge_funcs cdns_dsi_bridge_funcs = { .attach = cdns_dsi_bridge_attach, .mode_valid = cdns_dsi_bridge_mode_valid, - .disable = cdns_dsi_bridge_disable, - .pre_enable = cdns_dsi_bridge_pre_enable, - .enable = cdns_dsi_bridge_enable, - .post_disable = cdns_dsi_bridge_post_disable, + .atomic_disable = cdns_dsi_bridge_atomic_disable, + .atomic_pre_enable = cdns_dsi_bridge_atomic_pre_enable, + .atomic_enable = cdns_dsi_bridge_atomic_enable, + .atomic_post_disable = cdns_dsi_bridge_atomic_post_disable, + .atomic_check = cdns_dsi_bridge_atomic_check, + .atomic_reset = cdns_dsi_bridge_atomic_reset, + .atomic_duplicate_state = cdns_dsi_bridge_atomic_duplicate_state, + .atomic_destroy_state = cdns_dsi_bridge_atomic_destroy_state, + .atomic_get_input_bus_fmts = cdns_dsi_bridge_get_input_bus_fmts, }; static int cdns_dsi_attach(struct mipi_dsi_host *host, @@ -920,8 +1066,6 @@ static int cdns_dsi_attach(struct mipi_dsi_host *host, struct cdns_dsi_output *output = &dsi->output; struct cdns_dsi_input *input = &dsi->input; struct drm_bridge *bridge; - struct drm_panel *panel; - struct device_node *np; int ret; /* @@ -939,26 +1083,10 @@ static int cdns_dsi_attach(struct mipi_dsi_host *host, /* * The host <-> device link might be described using an OF-graph * representation, in this case we extract the device of_node from - * this representation, otherwise we use dsidev->dev.of_node which - * should have been filled by the core. + * this representation. */ - np = of_graph_get_remote_node(dsi->base.dev->of_node, DSI_OUTPUT_PORT, - dev->channel); - if (!np) - np = of_node_get(dev->dev.of_node); - - panel = of_drm_find_panel(np); - if (!IS_ERR(panel)) { - bridge = drm_panel_bridge_add_typed(panel, - DRM_MODE_CONNECTOR_DSI); - } else { - bridge = of_drm_find_bridge(dev->dev.of_node); - if (!bridge) - bridge = ERR_PTR(-EINVAL); - } - - of_node_put(np); - + bridge = devm_drm_of_get_bridge(dsi->base.dev, dsi->base.dev->of_node, + DSI_OUTPUT_PORT, dev->channel); if (IS_ERR(bridge)) { ret = PTR_ERR(bridge); dev_err(host->dev, "failed to add DSI device %s (err = %d)", @@ -968,7 +1096,6 @@ static int cdns_dsi_attach(struct mipi_dsi_host *host, output->dev = dev; output->bridge = bridge; - output->panel = panel; /* * The DSI output has been properly configured, we can now safely @@ -984,12 +1111,9 @@ static int cdns_dsi_detach(struct mipi_dsi_host *host, struct mipi_dsi_device *dev) { struct cdns_dsi *dsi = to_cdns_dsi(host); - struct cdns_dsi_output *output = &dsi->output; struct cdns_dsi_input *input = &dsi->input; drm_bridge_remove(&input->bridge); - if (output->panel) - drm_panel_bridge_remove(output->bridge); return 0; } @@ -1152,7 +1276,6 @@ static int __maybe_unused cdns_dsi_suspend(struct device *dev) clk_disable_unprepare(dsi->dsi_sys_clk); clk_disable_unprepare(dsi->dsi_p_clk); reset_control_assert(dsi->dsi_p_rst); - dsi->link_initialized = false; return 0; } diff --git a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.h b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.h index ca7ea2da635c..5db5dbbbcaad 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.h +++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.h @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -21,7 +20,6 @@ struct reset_control; struct cdns_dsi_output { struct mipi_dsi_device *dev; - struct drm_panel *panel; struct drm_bridge *bridge; union phy_configure_opts phy_opts; }; diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index 81fad14c2cd5..b431e7efd1f0 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -545,76 +545,6 @@ out: return ret; } -/** - * cdns_mhdp_link_power_up() - power up a DisplayPort link - * @aux: DisplayPort AUX channel - * @link: pointer to a structure containing the link configuration - * - * Returns 0 on success or a negative error code on failure. - */ -static -int cdns_mhdp_link_power_up(struct drm_dp_aux *aux, struct cdns_mhdp_link *link) -{ - u8 value; - int err; - - /* DP_SET_POWER register is only available on DPCD v1.1 and later */ - if (link->revision < 0x11) - return 0; - - err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); - if (err < 0) - return err; - - value &= ~DP_SET_POWER_MASK; - value |= DP_SET_POWER_D0; - - err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); - if (err < 0) - return err; - - /* - * According to the DP 1.1 specification, a "Sink Device must exit the - * power saving state within 1 ms" (Section 2.5.3.1, Table 5-52, "Sink - * Control Field" (register 0x600). - */ - usleep_range(1000, 2000); - - return 0; -} - -/** - * cdns_mhdp_link_power_down() - power down a DisplayPort link - * @aux: DisplayPort AUX channel - * @link: pointer to a structure containing the link configuration - * - * Returns 0 on success or a negative error code on failure. - */ -static -int cdns_mhdp_link_power_down(struct drm_dp_aux *aux, - struct cdns_mhdp_link *link) -{ - u8 value; - int err; - - /* DP_SET_POWER register is only available on DPCD v1.1 and later */ - if (link->revision < 0x11) - return 0; - - err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); - if (err < 0) - return err; - - value &= ~DP_SET_POWER_MASK; - value |= DP_SET_POWER_D3; - - err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); - if (err < 0) - return err; - - return 0; -} - /** * cdns_mhdp_link_configure() - configure a DisplayPort link * @aux: DisplayPort AUX channel @@ -1453,7 +1383,7 @@ static int cdns_mhdp_link_up(struct cdns_mhdp_device *mhdp) mhdp->link.capabilities |= DP_LINK_CAP_ENHANCED_FRAMING; dev_dbg(mhdp->dev, "Set sink device power state via DPCD\n"); - cdns_mhdp_link_power_up(&mhdp->aux, &mhdp->link); + drm_dp_link_power_up(&mhdp->aux, mhdp->link.revision); cdns_mhdp_fill_sink_caps(mhdp, dpcd); @@ -1500,7 +1430,7 @@ static void cdns_mhdp_link_down(struct cdns_mhdp_device *mhdp) WARN_ON(!mutex_is_locked(&mhdp->link_mutex)); if (mhdp->plugged) - cdns_mhdp_link_power_down(&mhdp->aux, &mhdp->link); + drm_dp_link_power_down(&mhdp->aux, mhdp->link.revision); mhdp->link_up = false; } @@ -1726,6 +1656,7 @@ static int cdns_mhdp_connector_init(struct cdns_mhdp_device *mhdp) } static int cdns_mhdp_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct cdns_mhdp_device *mhdp = bridge_to_mhdp(bridge); @@ -2305,7 +2236,7 @@ static int cdns_mhdp_update_link_status(struct cdns_mhdp_device *mhdp) * If everything looks fine, just return, as we don't handle * DP IRQs. */ - if (ret > 0 && + if (!ret && drm_dp_channel_eq_ok(status, mhdp->link.num_lanes) && drm_dp_clock_recovery_ok(status, mhdp->link.num_lanes)) goto out; diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index 81f7c701961f..634c5b030667 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -580,11 +580,13 @@ static int chipone_dsi_host_attach(struct chipone *icn) return ret; } -static int chipone_attach(struct drm_bridge *bridge, enum drm_bridge_attach_flags flags) +static int chipone_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, + enum drm_bridge_attach_flags flags) { struct chipone *icn = bridge_to_chipone(bridge); - return drm_bridge_attach(bridge->encoder, icn->panel_bridge, bridge, flags); + return drm_bridge_attach(encoder, icn->panel_bridge, bridge, flags); } #define MAX_INPUT_SEL_FORMATS 1 diff --git a/drivers/gpu/drm/bridge/chrontel-ch7033.c b/drivers/gpu/drm/bridge/chrontel-ch7033.c index da17f0978a79..210c45c1efd4 100644 --- a/drivers/gpu/drm/bridge/chrontel-ch7033.c +++ b/drivers/gpu/drm/bridge/chrontel-ch7033.c @@ -268,13 +268,14 @@ static void ch7033_hpd_event(void *arg, enum drm_connector_status status) } static int ch7033_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct ch7033_priv *priv = bridge_to_ch7033_priv(bridge); struct drm_connector *connector = &priv->connector; int ret; - ret = drm_bridge_attach(bridge->encoder, priv->next_bridge, bridge, + ret = drm_bridge_attach(encoder, priv->next_bridge, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret) return ret; @@ -305,7 +306,7 @@ static int ch7033_bridge_attach(struct drm_bridge *bridge, return ret; } - return drm_connector_attach_encoder(&priv->connector, bridge->encoder); + return drm_connector_attach_encoder(&priv->connector, encoder); } static void ch7033_bridge_detach(struct drm_bridge *bridge) diff --git a/drivers/gpu/drm/bridge/display-connector.c b/drivers/gpu/drm/bridge/display-connector.c index 72bc508d4e6e..badd2c7f91a1 100644 --- a/drivers/gpu/drm/bridge/display-connector.c +++ b/drivers/gpu/drm/bridge/display-connector.c @@ -34,6 +34,7 @@ to_display_connector(struct drm_bridge *bridge) } static int display_connector_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL; @@ -209,9 +210,10 @@ static int display_connector_probe(struct platform_device *pdev) const char *label = NULL; int ret; - conn = devm_kzalloc(&pdev->dev, sizeof(*conn), GFP_KERNEL); - if (!conn) - return -ENOMEM; + conn = devm_drm_bridge_alloc(&pdev->dev, struct display_connector, bridge, + &display_connector_bridge_funcs); + if (IS_ERR(conn)) + return PTR_ERR(conn); platform_set_drvdata(pdev, conn); @@ -361,7 +363,6 @@ static int display_connector_probe(struct platform_device *pdev) } } - conn->bridge.funcs = &display_connector_bridge_funcs; conn->bridge.of_node = pdev->dev.of_node; if (conn->bridge.ddc) diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index 26ae1ab5237f..2cb6dfc7a6d3 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -113,11 +113,12 @@ static unsigned long fsl_ldb_link_frequency(struct fsl_ldb *fsl_ldb, int clock) } static int fsl_ldb_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge); - return drm_bridge_attach(bridge->encoder, fsl_ldb->panel_bridge, + return drm_bridge_attach(encoder, fsl_ldb->panel_bridge, bridge, flags); } @@ -180,9 +181,9 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, configured_link_freq = clk_get_rate(fsl_ldb->clk); if (configured_link_freq != requested_link_freq) - dev_warn(fsl_ldb->dev, "Configured LDB clock (%lu Hz) does not match requested LVDS clock: %lu Hz\n", - configured_link_freq, - requested_link_freq); + dev_warn(fsl_ldb->dev, + "Configured %pC clock (%lu Hz) does not match requested LVDS clock: %lu Hz\n", + fsl_ldb->clk, configured_link_freq, requested_link_freq); clk_prepare_enable(fsl_ldb->clk); diff --git a/drivers/gpu/drm/bridge/imx/imx-ldb-helper.c b/drivers/gpu/drm/bridge/imx/imx-ldb-helper.c index 9b5bebbe357d..6149ba141a38 100644 --- a/drivers/gpu/drm/bridge/imx/imx-ldb-helper.c +++ b/drivers/gpu/drm/bridge/imx/imx-ldb-helper.c @@ -104,7 +104,7 @@ void ldb_bridge_disable_helper(struct drm_bridge *bridge) } EXPORT_SYMBOL_GPL(ldb_bridge_disable_helper); -int ldb_bridge_attach_helper(struct drm_bridge *bridge, +int ldb_bridge_attach_helper(struct drm_bridge *bridge, struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct ldb_channel *ldb_ch = bridge->driver_private; @@ -116,9 +116,8 @@ int ldb_bridge_attach_helper(struct drm_bridge *bridge, return -EINVAL; } - return drm_bridge_attach(bridge->encoder, - ldb_ch->next_bridge, bridge, - DRM_BRIDGE_ATTACH_NO_CONNECTOR); + return drm_bridge_attach(encoder, ldb_ch->next_bridge, bridge, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); } EXPORT_SYMBOL_GPL(ldb_bridge_attach_helper); @@ -191,8 +190,7 @@ int ldb_find_next_bridge_helper(struct ldb *ldb) } EXPORT_SYMBOL_GPL(ldb_find_next_bridge_helper); -void ldb_add_bridge_helper(struct ldb *ldb, - const struct drm_bridge_funcs *bridge_funcs) +void ldb_add_bridge_helper(struct ldb *ldb) { struct ldb_channel *ldb_ch; int i; @@ -204,7 +202,6 @@ void ldb_add_bridge_helper(struct ldb *ldb, continue; ldb_ch->bridge.driver_private = ldb_ch; - ldb_ch->bridge.funcs = bridge_funcs; ldb_ch->bridge.of_node = ldb_ch->np; drm_bridge_add(&ldb_ch->bridge); diff --git a/drivers/gpu/drm/bridge/imx/imx-ldb-helper.h b/drivers/gpu/drm/bridge/imx/imx-ldb-helper.h index a0a5cde27fbc..de187e326999 100644 --- a/drivers/gpu/drm/bridge/imx/imx-ldb-helper.h +++ b/drivers/gpu/drm/bridge/imx/imx-ldb-helper.h @@ -81,15 +81,14 @@ void ldb_bridge_enable_helper(struct drm_bridge *bridge); void ldb_bridge_disable_helper(struct drm_bridge *bridge); -int ldb_bridge_attach_helper(struct drm_bridge *bridge, +int ldb_bridge_attach_helper(struct drm_bridge *bridge, struct drm_encoder *encoder, enum drm_bridge_attach_flags flags); int ldb_init_helper(struct ldb *ldb); int ldb_find_next_bridge_helper(struct ldb *ldb); -void ldb_add_bridge_helper(struct ldb *ldb, - const struct drm_bridge_funcs *bridge_funcs); +void ldb_add_bridge_helper(struct ldb *ldb); void ldb_remove_bridge_helper(struct ldb *ldb); diff --git a/drivers/gpu/drm/bridge/imx/imx-legacy-bridge.c b/drivers/gpu/drm/bridge/imx/imx-legacy-bridge.c index 3ebf0b9866de..f072c6ed39ef 100644 --- a/drivers/gpu/drm/bridge/imx/imx-legacy-bridge.c +++ b/drivers/gpu/drm/bridge/imx/imx-legacy-bridge.c @@ -23,7 +23,8 @@ struct imx_legacy_bridge { #define to_imx_legacy_bridge(bridge) container_of(bridge, struct imx_legacy_bridge, base) static int imx_legacy_bridge_attach(struct drm_bridge *bridge, - enum drm_bridge_attach_flags flags) + struct drm_encoder *encoder, + enum drm_bridge_attach_flags flags) { if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) return -EINVAL; @@ -76,9 +77,9 @@ struct drm_bridge *devm_imx_drm_legacy_bridge(struct device *dev, imx_bridge->base.ops = DRM_BRIDGE_OP_MODES; imx_bridge->base.type = type; - ret = devm_drm_bridge_add(dev, &imx_bridge->base); - if (ret) - return ERR_PTR(ret); + ret = devm_drm_bridge_add(dev, &imx_bridge->base); + if (ret) + return ERR_PTR(ret); return &imx_bridge->base; } diff --git a/drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pvi.c b/drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pvi.c index a17433a7c755..8a4fd7d77a8d 100644 --- a/drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pvi.c +++ b/drivers/gpu/drm/bridge/imx/imx8mp-hdmi-pvi.c @@ -40,11 +40,12 @@ to_imx8mp_hdmi_pvi(struct drm_bridge *bridge) } static int imx8mp_hdmi_pvi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct imx8mp_hdmi_pvi *pvi = to_imx8mp_hdmi_pvi(bridge); - return drm_bridge_attach(bridge->encoder, pvi->next_bridge, + return drm_bridge_attach(encoder, pvi->next_bridge, bridge, flags); } diff --git a/drivers/gpu/drm/bridge/imx/imx8qm-ldb.c b/drivers/gpu/drm/bridge/imx/imx8qm-ldb.c index 524aac751359..47aa65938e6a 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qm-ldb.c +++ b/drivers/gpu/drm/bridge/imx/imx8qm-ldb.c @@ -47,7 +47,7 @@ struct imx8qm_ldb_channel { struct imx8qm_ldb { struct ldb base; struct device *dev; - struct imx8qm_ldb_channel channel[MAX_LDB_CHAN_NUM]; + struct imx8qm_ldb_channel *channel[MAX_LDB_CHAN_NUM]; struct clk *clk_pixel; struct clk *clk_bypass; int active_chno; @@ -107,7 +107,7 @@ static int imx8qm_ldb_bridge_atomic_check(struct drm_bridge *bridge, if (is_split) { imx8qm_ldb_ch = - &imx8qm_ldb->channel[imx8qm_ldb->active_chno ^ 1]; + imx8qm_ldb->channel[imx8qm_ldb->active_chno ^ 1]; imx8qm_ldb_set_phy_cfg(imx8qm_ldb, di_clk, is_split, true, phy_cfg); ret = phy_validate(imx8qm_ldb_ch->phy, PHY_MODE_LVDS, 0, &opts); @@ -158,7 +158,7 @@ imx8qm_ldb_bridge_mode_set(struct drm_bridge *bridge, if (is_split) { imx8qm_ldb_ch = - &imx8qm_ldb->channel[imx8qm_ldb->active_chno ^ 1]; + imx8qm_ldb->channel[imx8qm_ldb->active_chno ^ 1]; imx8qm_ldb_set_phy_cfg(imx8qm_ldb, di_clk, is_split, true, phy_cfg); ret = phy_configure(imx8qm_ldb_ch->phy, &opts); @@ -226,13 +226,13 @@ static void imx8qm_ldb_bridge_atomic_enable(struct drm_bridge *bridge, } if (is_split) { - ret = phy_power_on(imx8qm_ldb->channel[0].phy); + ret = phy_power_on(imx8qm_ldb->channel[0]->phy); if (ret) DRM_DEV_ERROR(dev, "failed to power on channel0 PHY: %d\n", ret); - ret = phy_power_on(imx8qm_ldb->channel[1].phy); + ret = phy_power_on(imx8qm_ldb->channel[1]->phy); if (ret) DRM_DEV_ERROR(dev, "failed to power on channel1 PHY: %d\n", @@ -261,12 +261,12 @@ static void imx8qm_ldb_bridge_atomic_disable(struct drm_bridge *bridge, ldb_bridge_disable_helper(bridge); if (is_split) { - ret = phy_power_off(imx8qm_ldb->channel[0].phy); + ret = phy_power_off(imx8qm_ldb->channel[0]->phy); if (ret) DRM_DEV_ERROR(dev, "failed to power off channel0 PHY: %d\n", ret); - ret = phy_power_off(imx8qm_ldb->channel[1].phy); + ret = phy_power_off(imx8qm_ldb->channel[1]->phy); if (ret) DRM_DEV_ERROR(dev, "failed to power off channel1 PHY: %d\n", @@ -412,7 +412,7 @@ static int imx8qm_ldb_get_phy(struct imx8qm_ldb *imx8qm_ldb) int i, ret; for (i = 0; i < MAX_LDB_CHAN_NUM; i++) { - imx8qm_ldb_ch = &imx8qm_ldb->channel[i]; + imx8qm_ldb_ch = imx8qm_ldb->channel[i]; ldb_ch = &imx8qm_ldb_ch->base; if (!ldb_ch->is_available) @@ -448,6 +448,14 @@ static int imx8qm_ldb_probe(struct platform_device *pdev) if (!imx8qm_ldb) return -ENOMEM; + for (i = 0; i < MAX_LDB_CHAN_NUM; i++) { + imx8qm_ldb->channel[i] = + devm_drm_bridge_alloc(dev, struct imx8qm_ldb_channel, base.bridge, + &imx8qm_ldb_bridge_funcs); + if (IS_ERR(imx8qm_ldb->channel[i])) + return PTR_ERR(imx8qm_ldb->channel[i]); + } + imx8qm_ldb->clk_pixel = devm_clk_get(dev, "pixel"); if (IS_ERR(imx8qm_ldb->clk_pixel)) { ret = PTR_ERR(imx8qm_ldb->clk_pixel); @@ -473,7 +481,7 @@ static int imx8qm_ldb_probe(struct platform_device *pdev) ldb->ctrl_reg = 0xe0; for (i = 0; i < MAX_LDB_CHAN_NUM; i++) - ldb->channel[i] = &imx8qm_ldb->channel[i].base; + ldb->channel[i] = &imx8qm_ldb->channel[i]->base; ret = ldb_init_helper(ldb); if (ret) @@ -499,12 +507,12 @@ static int imx8qm_ldb_probe(struct platform_device *pdev) } imx8qm_ldb->active_chno = 0; - imx8qm_ldb_ch = &imx8qm_ldb->channel[0]; + imx8qm_ldb_ch = imx8qm_ldb->channel[0]; ldb_ch = &imx8qm_ldb_ch->base; ldb_ch->link_type = pixel_order; } else { for (i = 0; i < MAX_LDB_CHAN_NUM; i++) { - imx8qm_ldb_ch = &imx8qm_ldb->channel[i]; + imx8qm_ldb_ch = imx8qm_ldb->channel[i]; ldb_ch = &imx8qm_ldb_ch->base; if (ldb_ch->is_available) { @@ -525,7 +533,7 @@ static int imx8qm_ldb_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imx8qm_ldb); pm_runtime_enable(dev); - ldb_add_bridge_helper(ldb, &imx8qm_ldb_bridge_funcs); + ldb_add_bridge_helper(ldb); return ret; } diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-ldb.c b/drivers/gpu/drm/bridge/imx/imx8qxp-ldb.c index 3cb484773ddf..5d272916e200 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qxp-ldb.c +++ b/drivers/gpu/drm/bridge/imx/imx8qxp-ldb.c @@ -44,7 +44,7 @@ struct imx8qxp_ldb_channel { struct imx8qxp_ldb { struct ldb base; struct device *dev; - struct imx8qxp_ldb_channel channel[MAX_LDB_CHAN_NUM]; + struct imx8qxp_ldb_channel *channel[MAX_LDB_CHAN_NUM]; struct clk *clk_pixel; struct clk *clk_bypass; struct drm_bridge *companion; @@ -410,7 +410,7 @@ static const struct drm_bridge_funcs imx8qxp_ldb_bridge_funcs = { static int imx8qxp_ldb_set_di_id(struct imx8qxp_ldb *imx8qxp_ldb) { struct imx8qxp_ldb_channel *imx8qxp_ldb_ch = - &imx8qxp_ldb->channel[imx8qxp_ldb->active_chno]; + imx8qxp_ldb->channel[imx8qxp_ldb->active_chno]; struct ldb_channel *ldb_ch = &imx8qxp_ldb_ch->base; struct device_node *ep, *remote; struct device *dev = imx8qxp_ldb->dev; @@ -456,7 +456,7 @@ imx8qxp_ldb_check_chno_and_dual_link(struct ldb_channel *ldb_ch, int link) static int imx8qxp_ldb_parse_dt_companion(struct imx8qxp_ldb *imx8qxp_ldb) { struct imx8qxp_ldb_channel *imx8qxp_ldb_ch = - &imx8qxp_ldb->channel[imx8qxp_ldb->active_chno]; + imx8qxp_ldb->channel[imx8qxp_ldb->active_chno]; struct ldb_channel *ldb_ch = &imx8qxp_ldb_ch->base; struct ldb_channel *companion_ldb_ch; struct device_node *companion; @@ -586,6 +586,14 @@ static int imx8qxp_ldb_probe(struct platform_device *pdev) if (!imx8qxp_ldb) return -ENOMEM; + for (i = 0; i < MAX_LDB_CHAN_NUM; i++) { + imx8qxp_ldb->channel[i] = + devm_drm_bridge_alloc(dev, struct imx8qxp_ldb_channel, base.bridge, + &imx8qxp_ldb_bridge_funcs); + if (IS_ERR(imx8qxp_ldb->channel[i])) + return PTR_ERR(imx8qxp_ldb->channel[i]); + } + imx8qxp_ldb->clk_pixel = devm_clk_get(dev, "pixel"); if (IS_ERR(imx8qxp_ldb->clk_pixel)) { ret = PTR_ERR(imx8qxp_ldb->clk_pixel); @@ -611,7 +619,7 @@ static int imx8qxp_ldb_probe(struct platform_device *pdev) ldb->ctrl_reg = 0xe0; for (i = 0; i < MAX_LDB_CHAN_NUM; i++) - ldb->channel[i] = &imx8qxp_ldb->channel[i].base; + ldb->channel[i] = &imx8qxp_ldb->channel[i]->base; ret = ldb_init_helper(ldb); if (ret) @@ -627,7 +635,7 @@ static int imx8qxp_ldb_probe(struct platform_device *pdev) } for (i = 0; i < MAX_LDB_CHAN_NUM; i++) { - imx8qxp_ldb_ch = &imx8qxp_ldb->channel[i]; + imx8qxp_ldb_ch = imx8qxp_ldb->channel[i]; ldb_ch = &imx8qxp_ldb_ch->base; if (ldb_ch->is_available) { @@ -660,9 +668,9 @@ static int imx8qxp_ldb_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imx8qxp_ldb); pm_runtime_enable(dev); - ldb_add_bridge_helper(ldb, &imx8qxp_ldb_bridge_funcs); + ldb_add_bridge_helper(ldb); - return ret; + return 0; } static void imx8qxp_ldb_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c index 1d9529dc7f2a..1f6fd488e703 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c +++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-combiner.c @@ -108,6 +108,7 @@ imx8qxp_pc_bridge_mode_valid(struct drm_bridge *bridge, } static int imx8qxp_pc_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct imx8qxp_pc_channel *ch = bridge->driver_private; @@ -119,7 +120,7 @@ static int imx8qxp_pc_bridge_attach(struct drm_bridge *bridge, return -EINVAL; } - return drm_bridge_attach(bridge->encoder, + return drm_bridge_attach(encoder, ch->next_bridge, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); } diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-link.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-link.c index cd6818db0fd3..e092c9ea99b0 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-link.c +++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pixel-link.c @@ -128,6 +128,7 @@ static void imx8qxp_pixel_link_set_mst_addr(struct imx8qxp_pixel_link *pl) } static int imx8qxp_pixel_link_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct imx8qxp_pixel_link *pl = bridge->driver_private; @@ -138,7 +139,7 @@ static int imx8qxp_pixel_link_bridge_attach(struct drm_bridge *bridge, return -EINVAL; } - return drm_bridge_attach(bridge->encoder, + return drm_bridge_attach(encoder, pl->next_bridge, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); } diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c index 49dd4f96d52c..da138ab51b3b 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c +++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c @@ -48,6 +48,7 @@ struct imx8qxp_pxl2dpi { #define bridge_to_p2d(b) container_of(b, struct imx8qxp_pxl2dpi, bridge) static int imx8qxp_pxl2dpi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct imx8qxp_pxl2dpi *p2d = bridge->driver_private; @@ -58,7 +59,7 @@ static int imx8qxp_pxl2dpi_bridge_attach(struct drm_bridge *bridge, return -EINVAL; } - return drm_bridge_attach(bridge->encoder, + return drm_bridge_attach(encoder, p2d->next_bridge, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); } diff --git a/drivers/gpu/drm/bridge/ite-it6263.c b/drivers/gpu/drm/bridge/ite-it6263.c index 21152a1c28f7..a3a63a977b0a 100644 --- a/drivers/gpu/drm/bridge/ite-it6263.c +++ b/drivers/gpu/drm/bridge/ite-it6263.c @@ -665,13 +665,14 @@ it6263_bridge_mode_valid(struct drm_bridge *bridge, } static int it6263_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct it6263 *it = bridge_to_it6263(bridge); struct drm_connector *connector; int ret; - ret = drm_bridge_attach(bridge->encoder, it->next_bridge, bridge, + ret = drm_bridge_attach(encoder, it->next_bridge, bridge, flags | DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret < 0) return ret; @@ -679,7 +680,7 @@ static int it6263_bridge_attach(struct drm_bridge *bridge, if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) return 0; - connector = drm_bridge_connector_init(bridge->dev, bridge->encoder); + connector = drm_bridge_connector_init(bridge->dev, encoder); if (IS_ERR(connector)) { ret = PTR_ERR(connector); dev_err(it->dev, "failed to initialize bridge connector: %d\n", @@ -687,7 +688,7 @@ static int it6263_bridge_attach(struct drm_bridge *bridge, return ret; } - drm_connector_attach_encoder(connector, bridge->encoder); + drm_connector_attach_encoder(connector, encoder); return 0; } diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index 8a607558ac89..1383d1e21afe 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -771,40 +771,6 @@ static void it6505_calc_video_info(struct it6505 *it6505) DRM_MODE_ARG(&it6505->video_info)); } -static int it6505_drm_dp_link_set_power(struct drm_dp_aux *aux, - struct it6505_drm_dp_link *link, - u8 mode) -{ - u8 value; - int err; - - /* DP_SET_POWER register is only available on DPCD v1.1 and later */ - if (link->revision < DPCD_V_1_1) - return 0; - - err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); - if (err < 0) - return err; - - value &= ~DP_SET_POWER_MASK; - value |= mode; - - err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); - if (err < 0) - return err; - - if (mode == DP_SET_POWER_D0) { - /* - * According to the DP 1.1 specification, a "Sink Device must - * exit the power saving state within 1 ms" (Section 2.5.3.1, - * Table 5-52, "Sink Control Field" (register 0x600). - */ - usleep_range(1000, 2000); - } - - return 0; -} - static void it6505_clear_int(struct it6505 *it6505) { it6505_write(it6505, INT_STATUS_01, 0xFF); @@ -2578,8 +2544,7 @@ static void it6505_irq_hpd(struct it6505 *it6505) } it6505->auto_train_retry = AUTO_TRAIN_RETRY; - it6505_drm_dp_link_set_power(&it6505->aux, &it6505->link, - DP_SET_POWER_D0); + drm_dp_link_power_up(&it6505->aux, it6505->link.revision); dp_sink_count = it6505_dpcd_read(it6505, DP_SINK_COUNT); it6505->sink_count = DP_GET_SINK_COUNT(dp_sink_count); @@ -2910,8 +2875,7 @@ static enum drm_connector_status it6505_detect(struct it6505 *it6505) } if (it6505->hpd_state) { - it6505_drm_dp_link_set_power(&it6505->aux, &it6505->link, - DP_SET_POWER_D0); + drm_dp_link_power_up(&it6505->aux, it6505->link.revision); dp_sink_count = it6505_dpcd_read(it6505, DP_SINK_COUNT); it6505->sink_count = DP_GET_SINK_COUNT(dp_sink_count); DRM_DEV_DEBUG_DRIVER(dev, "it6505->sink_count:%d branch:%d", @@ -3124,6 +3088,7 @@ static inline struct it6505 *bridge_to_it6505(struct drm_bridge *bridge) } static int it6505_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct it6505 *it6505 = bridge_to_it6505(bridge); @@ -3233,8 +3198,7 @@ static void it6505_bridge_atomic_enable(struct drm_bridge *bridge, it6505_int_mask_enable(it6505); it6505_video_reset(it6505); - it6505_drm_dp_link_set_power(&it6505->aux, &it6505->link, - DP_SET_POWER_D0); + drm_dp_link_power_up(&it6505->aux, it6505->link.revision); } static void it6505_bridge_atomic_disable(struct drm_bridge *bridge, @@ -3246,8 +3210,7 @@ static void it6505_bridge_atomic_disable(struct drm_bridge *bridge, DRM_DEV_DEBUG_DRIVER(dev, "start"); if (it6505->powered) { - it6505_drm_dp_link_set_power(&it6505->aux, &it6505->link, - DP_SET_POWER_D3); + drm_dp_link_power_down(&it6505->aux, it6505->link.revision); it6505_video_disable(it6505); } } diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index b9f90f32145d..7b110ae53291 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -586,6 +586,7 @@ static bool it66121_is_hpd_detect(struct it66121_ctx *ctx) } static int it66121_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct it66121_ctx *ctx = container_of(bridge, struct it66121_ctx, bridge); @@ -594,7 +595,7 @@ static int it66121_bridge_attach(struct drm_bridge *bridge, if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) return -EINVAL; - ret = drm_bridge_attach(bridge->encoder, ctx->next_bridge, bridge, flags); + ret = drm_bridge_attach(encoder, ctx->next_bridge, bridge, flags); if (ret) return ret; diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index 52da204f5740..3e49d855b364 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -543,12 +543,13 @@ exit: } static int lt8912_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct lt8912 *lt = bridge_to_lt8912(bridge); int ret; - ret = drm_bridge_attach(bridge->encoder, lt->hdmi_port, bridge, + ret = drm_bridge_attach(encoder, lt->hdmi_port, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret < 0) { dev_err(lt->dev, "Failed to attach next bridge (%d)\n", ret); diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c b/drivers/gpu/drm/bridge/lontium-lt9211.c index 0fc5ea18fe6a..9b2dac9bd63c 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9211.c +++ b/drivers/gpu/drm/bridge/lontium-lt9211.c @@ -99,11 +99,12 @@ static struct lt9211 *bridge_to_lt9211(struct drm_bridge *bridge) } static int lt9211_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct lt9211 *ctx = bridge_to_lt9211(bridge); - return drm_bridge_attach(bridge->encoder, ctx->panel_bridge, + return drm_bridge_attach(encoder, ctx->panel_bridge, &ctx->bridge, flags); } diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index 026803034231..a35a8b8ca89c 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -740,11 +740,12 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611, } static int lt9611_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); - return drm_bridge_attach(bridge->encoder, lt9611->next_bridge, + return drm_bridge_attach(encoder, lt9611->next_bridge, bridge, flags); } @@ -1130,7 +1131,7 @@ static int lt9611_probe(struct i2c_client *client) lt9611->bridge.of_node = client->dev.of_node; lt9611->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_HPD | DRM_BRIDGE_OP_MODES | - DRM_BRIDGE_OP_HDMI; + DRM_BRIDGE_OP_HDMI | DRM_BRIDGE_OP_HDMI_AUDIO; lt9611->bridge.type = DRM_MODE_CONNECTOR_HDMIA; lt9611->bridge.vendor = "Lontium"; lt9611->bridge.product = "LT9611"; diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index f4c3ff1fdc69..766da2cb45a7 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -280,11 +280,12 @@ static struct mipi_dsi_device *lt9611uxc_attach_dsi(struct lt9611uxc *lt9611uxc, } static int lt9611uxc_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct lt9611uxc *lt9611uxc = bridge_to_lt9611uxc(bridge); - return drm_bridge_attach(bridge->encoder, lt9611uxc->next_bridge, + return drm_bridge_attach(encoder, lt9611uxc->next_bridge, bridge, flags); } @@ -774,9 +775,9 @@ static int lt9611uxc_probe(struct i2c_client *client) return -ENODEV; } - lt9611uxc = devm_kzalloc(dev, sizeof(*lt9611uxc), GFP_KERNEL); - if (!lt9611uxc) - return -ENOMEM; + lt9611uxc = devm_drm_bridge_alloc(dev, struct lt9611uxc, bridge, <9611uxc_bridge_funcs); + if (IS_ERR(lt9611uxc)) + return PTR_ERR(lt9611uxc); lt9611uxc->dev = dev; lt9611uxc->client = client; @@ -855,7 +856,6 @@ retry: i2c_set_clientdata(client, lt9611uxc); - lt9611uxc->bridge.funcs = <9611uxc_bridge_funcs; lt9611uxc->bridge.of_node = client->dev.of_node; lt9611uxc->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; if (lt9611uxc->hpd_supported) @@ -880,7 +880,11 @@ retry: } } - return lt9611uxc_audio_init(dev, lt9611uxc); + ret = lt9611uxc_audio_init(dev, lt9611uxc); + if (ret) + goto err_remove_bridge; + + return 0; err_remove_bridge: free_irq(client->irq, lt9611uxc); diff --git a/drivers/gpu/drm/bridge/lvds-codec.c b/drivers/gpu/drm/bridge/lvds-codec.c index 389af0233fcd..1646e454e0b0 100644 --- a/drivers/gpu/drm/bridge/lvds-codec.c +++ b/drivers/gpu/drm/bridge/lvds-codec.c @@ -34,11 +34,12 @@ static inline struct lvds_codec *to_lvds_codec(struct drm_bridge *bridge) } static int lvds_codec_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct lvds_codec *lvds_codec = to_lvds_codec(bridge); - return drm_bridge_attach(bridge->encoder, lvds_codec->panel_bridge, + return drm_bridge_attach(encoder, lvds_codec->panel_bridge, bridge, flags); } diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index a47aabf134fd..15a5a1f644fc 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -190,6 +190,7 @@ static irqreturn_t ge_b850v3_lvds_irq_handler(int irq, void *dev_id) } static int ge_b850v3_lvds_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct i2c_client *stdp4028_i2c diff --git a/drivers/gpu/drm/bridge/microchip-lvds.c b/drivers/gpu/drm/bridge/microchip-lvds.c index 53dd140a1b8d..1d4ae0097df8 100644 --- a/drivers/gpu/drm/bridge/microchip-lvds.c +++ b/drivers/gpu/drm/bridge/microchip-lvds.c @@ -104,11 +104,12 @@ static void lvds_serialiser_on(struct mchp_lvds *lvds) } static int mchp_lvds_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct mchp_lvds *lvds = bridge_to_lvds(bridge); - return drm_bridge_attach(bridge->encoder, lvds->panel_bridge, + return drm_bridge_attach(encoder, lvds->panel_bridge, bridge, flags); } diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c index d04c62a0cb9f..55912ae11f46 100644 --- a/drivers/gpu/drm/bridge/nwl-dsi.c +++ b/drivers/gpu/drm/bridge/nwl-dsi.c @@ -910,6 +910,7 @@ static void nwl_dsi_bridge_atomic_enable(struct drm_bridge *bridge, } static int nwl_dsi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct nwl_dsi *dsi = bridge_to_dsi(bridge); @@ -919,7 +920,7 @@ static int nwl_dsi_bridge_attach(struct drm_bridge *bridge, if (IS_ERR(panel_bridge)) return PTR_ERR(panel_bridge); - return drm_bridge_attach(bridge->encoder, panel_bridge, bridge, flags); + return drm_bridge_attach(encoder, panel_bridge, bridge, flags); } static u32 *nwl_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index 27261b2ac9c8..25d7c415478b 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -214,13 +214,14 @@ static const struct drm_connector_funcs ptn3460_connector_funcs = { }; static int ptn3460_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct ptn3460_bridge *ptn_bridge = bridge_to_ptn3460(bridge); int ret; /* Let this driver create connector if requested */ - ret = drm_bridge_attach(bridge->encoder, ptn_bridge->panel_bridge, + ret = drm_bridge_attach(encoder, ptn_bridge->panel_bridge, bridge, flags | DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret < 0) return ret; @@ -239,7 +240,7 @@ static int ptn3460_bridge_attach(struct drm_bridge *bridge, &ptn3460_connector_helper_funcs); drm_connector_register(&ptn_bridge->connector); drm_connector_attach_encoder(&ptn_bridge->connector, - bridge->encoder); + encoder); drm_helper_hpd_irq_event(ptn_bridge->connector.dev); diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 258c85c83a28..79b009ab9396 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -58,6 +58,7 @@ static const struct drm_connector_funcs panel_bridge_connector_funcs = { }; static int panel_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); @@ -81,7 +82,7 @@ static int panel_bridge_attach(struct drm_bridge *bridge, drm_panel_bridge_set_orientation(connector, bridge); drm_connector_attach_encoder(&panel_bridge->connector, - bridge->encoder); + encoder); if (bridge->dev->registered) { if (connector->funcs->reset) diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c index 13ada42a5514..8726fefc5c65 100644 --- a/drivers/gpu/drm/bridge/parade-ps8622.c +++ b/drivers/gpu/drm/bridge/parade-ps8622.c @@ -418,6 +418,7 @@ static void ps8622_post_disable(struct drm_bridge *bridge) } static int ps8622_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct ps8622_bridge *ps8622 = bridge_to_ps8622(bridge); diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c index a42138b33258..2422ff68c104 100644 --- a/drivers/gpu/drm/bridge/parade-ps8640.c +++ b/drivers/gpu/drm/bridge/parade-ps8640.c @@ -494,6 +494,7 @@ static void ps8640_atomic_post_disable(struct drm_bridge *bridge, } static int ps8640_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct ps8640 *ps_bridge = bridge_to_ps8640(bridge); @@ -518,7 +519,7 @@ static int ps8640_bridge_attach(struct drm_bridge *bridge, } /* Attach the panel-bridge to the dsi bridge */ - ret = drm_bridge_attach(bridge->encoder, ps_bridge->panel_bridge, + ret = drm_bridge_attach(encoder, ps_bridge->panel_bridge, &ps_bridge->bridge, flags); if (ret) goto err_bridge_attach; diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 54de6ed2fae8..0014c497e3fe 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -1640,11 +1640,12 @@ static void samsung_dsim_mode_set(struct drm_bridge *bridge, } static int samsung_dsim_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct samsung_dsim *dsi = bridge_to_dsi(bridge); - return drm_bridge_attach(bridge->encoder, dsi->out_bridge, bridge, + return drm_bridge_attach(encoder, dsi->out_bridge, bridge, flags); } @@ -1935,9 +1936,9 @@ int samsung_dsim_probe(struct platform_device *pdev) struct samsung_dsim *dsi; int ret, i; - dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL); - if (!dsi) - return -ENOMEM; + dsi = devm_drm_bridge_alloc(dev, struct samsung_dsim, bridge, &samsung_dsim_bridge_funcs); + if (IS_ERR(dsi)) + return PTR_ERR(dsi); init_completion(&dsi->completed); spin_lock_init(&dsi->transfer_lock); @@ -2007,7 +2008,6 @@ int samsung_dsim_probe(struct platform_device *pdev) pm_runtime_enable(dev); - dsi->bridge.funcs = &samsung_dsim_bridge_funcs; dsi->bridge.of_node = dev->of_node; dsi->bridge.type = DRM_MODE_CONNECTOR_DSI; diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 914a2609a685..6de61d9fe064 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -416,6 +416,7 @@ out: } static int sii902x_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct sii902x *sii902x = bridge_to_sii902x(bridge); @@ -424,7 +425,7 @@ static int sii902x_bridge_attach(struct drm_bridge *bridge, int ret; if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) - return drm_bridge_attach(bridge->encoder, sii902x->next_bridge, + return drm_bridge_attach(encoder, sii902x->next_bridge, bridge, flags); drm_connector_helper_add(&sii902x->connector, @@ -452,7 +453,7 @@ static int sii902x_bridge_attach(struct drm_bridge *bridge, if (ret) return ret; - drm_connector_attach_encoder(&sii902x->connector, bridge->encoder); + drm_connector_attach_encoder(&sii902x->connector, encoder); return 0; } @@ -1138,6 +1139,7 @@ static int sii902x_init(struct sii902x *sii902x) sii902x->bridge.of_node = dev->of_node; sii902x->bridge.timings = &default_sii902x_timings; sii902x->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; + sii902x->bridge.type = DRM_MODE_CONNECTOR_HDMIA; if (sii902x->i2c->irq > 0) sii902x->bridge.ops |= DRM_BRIDGE_OP_HPD; diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 28a2e1ee04b2..3af650dc92a1 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -2203,6 +2203,7 @@ static inline struct sii8620 *bridge_to_sii8620(struct drm_bridge *bridge) } static int sii8620_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct sii8620 *ctx = bridge_to_sii8620(bridge); diff --git a/drivers/gpu/drm/bridge/simple-bridge.c b/drivers/gpu/drm/bridge/simple-bridge.c index ab0b0e36e97a..70db5b99e5bb 100644 --- a/drivers/gpu/drm/bridge/simple-bridge.c +++ b/drivers/gpu/drm/bridge/simple-bridge.c @@ -103,12 +103,13 @@ static const struct drm_connector_funcs simple_bridge_con_funcs = { }; static int simple_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct simple_bridge *sbridge = drm_bridge_to_simple_bridge(bridge); int ret; - ret = drm_bridge_attach(bridge->encoder, sbridge->next_bridge, bridge, + ret = drm_bridge_attach(encoder, sbridge->next_bridge, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret < 0) return ret; @@ -127,7 +128,7 @@ static int simple_bridge_attach(struct drm_bridge *bridge, return ret; } - drm_connector_attach_encoder(&sbridge->connector, bridge->encoder); + drm_connector_attach_encoder(&sbridge->connector, encoder); return 0; } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index 6166f197e37b..5e5f8c2f95be 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -1077,6 +1077,7 @@ struct dw_hdmi_qp *dw_hdmi_qp_bind(struct platform_device *pdev, hdmi->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_HDMI | + DRM_BRIDGE_OP_HDMI_AUDIO | DRM_BRIDGE_OP_HPD; hdmi->bridge.of_node = pdev->dev.of_node; hdmi->bridge.type = DRM_MODE_CONNECTOR_HDMIA; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 0890add5f707..8791408dd1ff 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -22,8 +22,8 @@ #include -#include -#include +#include +#include #include #include @@ -2889,12 +2889,13 @@ static int dw_hdmi_bridge_atomic_check(struct drm_bridge *bridge, } static int dw_hdmi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct dw_hdmi *hdmi = bridge->driver_private; if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) - return drm_bridge_attach(bridge->encoder, hdmi->next_bridge, + return drm_bridge_attach(encoder, hdmi->next_bridge, bridge, flags); return dw_hdmi_connector_create(hdmi); @@ -3332,9 +3333,9 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, u8 config0; u8 config3; - hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL); - if (!hdmi) - return ERR_PTR(-ENOMEM); + hdmi = devm_drm_bridge_alloc(dev, struct dw_hdmi, bridge, &dw_hdmi_bridge_funcs); + if (IS_ERR(hdmi)) + return hdmi; hdmi->plat_data = plat_data; hdmi->dev = dev; @@ -3494,7 +3495,6 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, } hdmi->bridge.driver_private = hdmi; - hdmi->bridge.funcs = &dw_hdmi_bridge_funcs; hdmi->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_HPD; hdmi->bridge.interlace_allowed = true; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index 2b6e70a49f43..b08ada920a50 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -1072,15 +1072,16 @@ dw_mipi_dsi_bridge_mode_valid(struct drm_bridge *bridge, } static int dw_mipi_dsi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge); /* Set the encoder type as caller does not know it */ - bridge->encoder->encoder_type = DRM_MODE_ENCODER_DSI; + encoder->encoder_type = DRM_MODE_ENCODER_DSI; /* Attach the panel-bridge to the dsi bridge */ - return drm_bridge_attach(bridge->encoder, dsi->panel_bridge, bridge, + return drm_bridge_attach(encoder, dsi->panel_bridge, bridge, flags); } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c index 5fd7a459efdd..c76f5f2e74d1 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c @@ -870,15 +870,16 @@ dw_mipi_dsi2_bridge_mode_valid(struct drm_bridge *bridge, } static int dw_mipi_dsi2_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); /* Set the encoder type as caller does not know it */ - bridge->encoder->encoder_type = DRM_MODE_ENCODER_DSI; + encoder->encoder_type = DRM_MODE_ENCODER_DSI; /* Attach the panel-bridge to the dsi bridge */ - return drm_bridge_attach(bridge->encoder, dsi2->panel_bridge, bridge, + return drm_bridge_attach(encoder, dsi2->panel_bridge, bridge, flags); } diff --git a/drivers/gpu/drm/bridge/tc358762.c b/drivers/gpu/drm/bridge/tc358762.c index 49c76027f831..edf01476f2ef 100644 --- a/drivers/gpu/drm/bridge/tc358762.c +++ b/drivers/gpu/drm/bridge/tc358762.c @@ -202,11 +202,12 @@ static void tc358762_enable(struct drm_bridge *bridge, } static int tc358762_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tc358762 *ctx = bridge_to_tc358762(bridge); - return drm_bridge_attach(bridge->encoder, ctx->panel_bridge, + return drm_bridge_attach(encoder, ctx->panel_bridge, bridge, flags); } diff --git a/drivers/gpu/drm/bridge/tc358764.c b/drivers/gpu/drm/bridge/tc358764.c index 3d3d135b4348..3f76c890fad9 100644 --- a/drivers/gpu/drm/bridge/tc358764.c +++ b/drivers/gpu/drm/bridge/tc358764.c @@ -295,11 +295,12 @@ static void tc358764_pre_enable(struct drm_bridge *bridge) } static int tc358764_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tc358764 *ctx = bridge_to_tc358764(bridge); - return drm_bridge_attach(bridge->encoder, ctx->next_bridge, bridge, flags); + return drm_bridge_attach(encoder, ctx->next_bridge, bridge, flags); } static const struct drm_bridge_funcs tc358764_bridge_funcs = { diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 39e2d3a7a27d..7e5449fb86a3 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1795,6 +1795,7 @@ static const struct drm_connector_funcs tc_connector_funcs = { }; static int tc_dpi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tc_data *tc = bridge_to_tc(bridge); @@ -1807,6 +1808,7 @@ static int tc_dpi_bridge_attach(struct drm_bridge *bridge, } static int tc_edp_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24; diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index ec79b0dd0e2c..063f217a17b6 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -554,6 +554,7 @@ static const struct mipi_dsi_host_ops tc358768_dsi_host_ops = { }; static int tc358768_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tc358768_priv *priv = bridge_to_tc358768(bridge); @@ -563,7 +564,7 @@ static int tc358768_bridge_attach(struct drm_bridge *bridge, return -ENOTSUPP; } - return drm_bridge_attach(bridge->encoder, priv->output.bridge, bridge, + return drm_bridge_attach(encoder, priv->output.bridge, bridge, flags); } @@ -580,7 +581,8 @@ tc358768_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } -static void tc358768_bridge_disable(struct drm_bridge *bridge) +static void tc358768_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct tc358768_priv *priv = bridge_to_tc358768(bridge); int ret; @@ -602,7 +604,8 @@ static void tc358768_bridge_disable(struct drm_bridge *bridge) dev_warn(priv->dev, "Software disable failed: %d\n", ret); } -static void tc358768_bridge_post_disable(struct drm_bridge *bridge) +static void tc358768_bridge_atomic_post_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct tc358768_priv *priv = bridge_to_tc358768(bridge); @@ -682,13 +685,17 @@ static u32 tc358768_dsi_bytes_to_ns(struct tc358768_priv *priv, u32 val) return (u32)div_u64(m, n); } -static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) +static void tc358768_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct tc358768_priv *priv = bridge_to_tc358768(bridge); struct mipi_dsi_device *dsi_dev = priv->output.dev; unsigned long mode_flags = dsi_dev->mode_flags; u32 val, val2, lptxcnt, hact, data_type; s32 raw_val; + struct drm_crtc_state *crtc_state; + struct drm_connector_state *conn_state; + struct drm_connector *connector; const struct drm_display_mode *mode; u32 hsbyteclk_ps, dsiclk_ps, ui_ps; u32 dsiclk, hsbyteclk; @@ -719,7 +726,10 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) return; } - mode = &bridge->encoder->crtc->state->adjusted_mode; + connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + conn_state = drm_atomic_get_new_connector_state(state, connector); + crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + mode = &crtc_state->adjusted_mode; ret = tc358768_setup_pll(priv, mode); if (ret) { dev_err(dev, "PLL setup failed: %d\n", ret); @@ -1076,14 +1086,12 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) tc358768_write(priv, TC358768_DSI_CONFW, val); ret = tc358768_clear_error(priv); - if (ret) { + if (ret) dev_err(dev, "Bridge pre_enable failed: %d\n", ret); - tc358768_bridge_disable(bridge); - tc358768_bridge_post_disable(bridge); - } } -static void tc358768_bridge_enable(struct drm_bridge *bridge) +static void tc358768_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct tc358768_priv *priv = bridge_to_tc358768(bridge); int ret; @@ -1100,11 +1108,8 @@ static void tc358768_bridge_enable(struct drm_bridge *bridge) tc358768_update_bits(priv, TC358768_CONFCTL, BIT(6), BIT(6)); ret = tc358768_clear_error(priv); - if (ret) { + if (ret) dev_err(priv->dev, "Bridge enable failed: %d\n", ret); - tc358768_bridge_disable(bridge); - tc358768_bridge_post_disable(bridge); - } } #define MAX_INPUT_SEL_FORMATS 1 @@ -1166,10 +1171,10 @@ static const struct drm_bridge_funcs tc358768_bridge_funcs = { .attach = tc358768_bridge_attach, .mode_valid = tc358768_bridge_mode_valid, .mode_fixup = tc358768_mode_fixup, - .pre_enable = tc358768_bridge_pre_enable, - .enable = tc358768_bridge_enable, - .disable = tc358768_bridge_disable, - .post_disable = tc358768_bridge_post_disable, + .atomic_pre_enable = tc358768_bridge_atomic_pre_enable, + .atomic_enable = tc358768_bridge_atomic_enable, + .atomic_disable = tc358768_bridge_atomic_disable, + .atomic_post_disable = tc358768_bridge_atomic_post_disable, .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, diff --git a/drivers/gpu/drm/bridge/tc358775.c b/drivers/gpu/drm/bridge/tc358775.c index c89757bec4e6..1b10e6ee1724 100644 --- a/drivers/gpu/drm/bridge/tc358775.c +++ b/drivers/gpu/drm/bridge/tc358775.c @@ -286,7 +286,8 @@ static inline struct tc_data *bridge_to_tc(struct drm_bridge *b) return container_of(b, struct tc_data, bridge); } -static void tc_bridge_pre_enable(struct drm_bridge *bridge) +static void tc_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct tc_data *tc = bridge_to_tc(bridge); struct device *dev = &tc->dsi->dev; @@ -309,7 +310,8 @@ static void tc_bridge_pre_enable(struct drm_bridge *bridge) usleep_range(10, 20); } -static void tc_bridge_post_disable(struct drm_bridge *bridge) +static void tc_bridge_atomic_post_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct tc_data *tc = bridge_to_tc(bridge); struct device *dev = &tc->dsi->dev; @@ -368,30 +370,21 @@ static void d2l_write(struct i2c_client *i2c, u16 addr, u32 val) ret, addr); } -/* helper function to access bus_formats */ -static struct drm_connector *get_connector(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct drm_connector *connector; - - list_for_each_entry(connector, &dev->mode_config.connector_list, head) - if (connector->encoder == encoder) - return connector; - - return NULL; -} - -static void tc_bridge_enable(struct drm_bridge *bridge) +static void tc_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) { struct tc_data *tc = bridge_to_tc(bridge); u32 hback_porch, hsync_len, hfront_porch, hactive, htime1, htime2; u32 vback_porch, vsync_len, vfront_porch, vactive, vtime1, vtime2; u32 val = 0; u16 dsiclk, clkdiv, byteclk, t1, t2, t3, vsdelay; - struct drm_display_mode *mode; - struct drm_connector *connector = get_connector(bridge->encoder); - - mode = &bridge->encoder->crtc->state->adjusted_mode; + struct drm_connector *connector = + drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + struct drm_connector_state *conn_state = + drm_atomic_get_new_connector_state(state, connector); + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state, conn_state->crtc); + struct drm_display_mode *mode = &crtc_state->adjusted_mode; hback_porch = mode->htotal - mode->hsync_end; hsync_len = mode->hsync_end - mode->hsync_start; @@ -589,21 +582,25 @@ static int tc358775_parse_dt(struct device_node *np, struct tc_data *tc) } static int tc_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tc_data *tc = bridge_to_tc(bridge); /* Attach the panel-bridge to the dsi bridge */ - return drm_bridge_attach(bridge->encoder, tc->panel_bridge, + return drm_bridge_attach(encoder, tc->panel_bridge, &tc->bridge, flags); } static const struct drm_bridge_funcs tc_bridge_funcs = { .attach = tc_bridge_attach, - .pre_enable = tc_bridge_pre_enable, - .enable = tc_bridge_enable, + .atomic_pre_enable = tc_bridge_atomic_pre_enable, + .atomic_enable = tc_bridge_atomic_enable, .mode_valid = tc_mode_valid, - .post_disable = tc_bridge_post_disable, + .atomic_post_disable = tc_bridge_atomic_post_disable, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, }; static int tc_attach_host(struct tc_data *tc) diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index 20658258fb51..850909f78a7b 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1365,6 +1365,7 @@ static int tda998x_connector_init(struct tda998x_priv *priv, /* DRM bridge functions */ static int tda998x_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tda998x_priv *priv = bridge_to_tda998x_priv(bridge); @@ -1780,9 +1781,9 @@ static int tda998x_create(struct device *dev) u32 video; int rev_lo, rev_hi, ret; - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + priv = devm_drm_bridge_alloc(dev, struct tda998x_priv, bridge, &tda998x_bridge_funcs); + if (IS_ERR(priv)) + return PTR_ERR(priv); dev_set_drvdata(dev, priv); @@ -1947,7 +1948,6 @@ static int tda998x_create(struct device *dev) tda998x_audio_codec_init(priv, &client->dev); } - priv->bridge.funcs = &tda998x_bridge_funcs; #ifdef CONFIG_OF priv->bridge.of_node = dev->of_node; #endif diff --git a/drivers/gpu/drm/bridge/thc63lvd1024.c b/drivers/gpu/drm/bridge/thc63lvd1024.c index bba10cf9b4f9..e2fc78adebcf 100644 --- a/drivers/gpu/drm/bridge/thc63lvd1024.c +++ b/drivers/gpu/drm/bridge/thc63lvd1024.c @@ -43,11 +43,12 @@ static inline struct thc63_dev *to_thc63(struct drm_bridge *bridge) } static int thc63_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct thc63_dev *thc63 = to_thc63(bridge); - return drm_bridge_attach(bridge->encoder, thc63->next, bridge, flags); + return drm_bridge_attach(encoder, thc63->next, bridge, flags); } static enum drm_mode_status thc63_mode_valid(struct drm_bridge *bridge, diff --git a/drivers/gpu/drm/bridge/ti-dlpc3433.c b/drivers/gpu/drm/bridge/ti-dlpc3433.c index 85f2a0e74a1c..47638d1c96ec 100644 --- a/drivers/gpu/drm/bridge/ti-dlpc3433.c +++ b/drivers/gpu/drm/bridge/ti-dlpc3433.c @@ -242,12 +242,12 @@ static void dlpc_mode_set(struct drm_bridge *bridge, drm_mode_copy(&dlpc->mode, adjusted_mode); } -static int dlpc_attach(struct drm_bridge *bridge, +static int dlpc_attach(struct drm_bridge *bridge, struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct dlpc *dlpc = bridge_to_dlpc(bridge); - return drm_bridge_attach(bridge->encoder, dlpc->next_bridge, bridge, flags); + return drm_bridge_attach(encoder, dlpc->next_bridge, bridge, flags); } static const struct drm_bridge_funcs dlpc_bridge_funcs = { diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 95563aa1b450..033c44326552 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -40,7 +40,7 @@ #include #include -#include /* DRM_MODESET_LOCK_ALL_BEGIN() needs drm_drv_uses_atomic_modeset() */ +#include #include #include #include @@ -290,11 +290,12 @@ static struct sn65dsi83 *bridge_to_sn65dsi83(struct drm_bridge *bridge) } static int sn65dsi83_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); - return drm_bridge_attach(bridge->encoder, ctx->panel_bridge, + return drm_bridge_attach(encoder, ctx->panel_bridge, &ctx->bridge, flags); } @@ -370,7 +371,6 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx) static int sn65dsi83_reset_pipe(struct sn65dsi83 *sn65dsi83) { - struct drm_device *dev = sn65dsi83->bridge.dev; struct drm_modeset_acquire_ctx ctx; int err; @@ -385,26 +385,21 @@ static int sn65dsi83_reset_pipe(struct sn65dsi83 *sn65dsi83) * Keep the lock during the whole operation to be atomic. */ - DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, err); - - if (!sn65dsi83->bridge.encoder->crtc) { - /* - * No CRTC attached -> No CRTC active outputs to reset - * This can happen when the SN65DSI83 is reset. Simply do - * nothing without returning any errors. - */ - err = 0; - goto end; - } + drm_modeset_acquire_init(&ctx, 0); dev_warn(sn65dsi83->dev, "reset the pipe\n"); - err = drm_atomic_helper_reset_crtc(sn65dsi83->bridge.encoder->crtc, &ctx); +retry: + err = drm_bridge_helper_reset_crtc(&sn65dsi83->bridge, &ctx); + if (err == -EDEADLK) { + drm_modeset_backoff(&ctx); + goto retry; + } -end: - DRM_MODESET_LOCK_ALL_END(dev, ctx, err); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); - return err; + return 0; } static void sn65dsi83_reset_work(struct work_struct *ws) @@ -946,9 +941,9 @@ static int sn65dsi83_probe(struct i2c_client *client) struct sn65dsi83 *ctx; int ret; - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + ctx = devm_drm_bridge_alloc(dev, struct sn65dsi83, bridge, &sn65dsi83_funcs); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ctx->dev = dev; INIT_WORK(&ctx->reset_work, sn65dsi83_reset_work); @@ -988,7 +983,6 @@ static int sn65dsi83_probe(struct i2c_client *client) dev_set_drvdata(dev, ctx); i2c_set_clientdata(client, ctx); - ctx->bridge.funcs = &sn65dsi83_funcs; ctx->bridge.of_node = dev->of_node; ctx->bridge.pre_enable_prev_first = true; ctx->bridge.type = DRM_MODE_CONNECTOR_LVDS; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 01d456b955ab..60224f476e1d 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -35,6 +35,7 @@ #include #include +#define SN_DEVICE_ID_REGS 0x00 /* up to 0x07 */ #define SN_DEVICE_REV_REG 0x08 #define SN_DPPLL_SRC_REG 0x0A #define DPPLL_CLK_SRC_DSICLK BIT(0) @@ -243,11 +244,26 @@ static void ti_sn65dsi86_write_u16(struct ti_sn65dsi86 *pdata, regmap_bulk_write(pdata->regmap, reg, buf, ARRAY_SIZE(buf)); } -static u32 ti_sn_bridge_get_dsi_freq(struct ti_sn65dsi86 *pdata) +static struct drm_display_mode * +get_new_adjusted_display_mode(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct drm_connector *connector = + drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + struct drm_connector_state *conn_state = + drm_atomic_get_new_connector_state(state, connector); + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state, conn_state->crtc); + + return &crtc_state->adjusted_mode; +} + +static u32 ti_sn_bridge_get_dsi_freq(struct ti_sn65dsi86 *pdata, + struct drm_atomic_state *state) { u32 bit_rate_khz, clk_freq_khz; struct drm_display_mode *mode = - &pdata->bridge.encoder->crtc->state->adjusted_mode; + get_new_adjusted_display_mode(&pdata->bridge, state); bit_rate_khz = mode->clock * mipi_dsi_pixel_format_to_bpp(pdata->dsi->format); @@ -274,7 +290,8 @@ static const u32 ti_sn_bridge_dsiclk_lut[] = { 460800000, }; -static void ti_sn_bridge_set_refclk_freq(struct ti_sn65dsi86 *pdata) +static void ti_sn_bridge_set_refclk_freq(struct ti_sn65dsi86 *pdata, + struct drm_atomic_state *state) { int i; u32 refclk_rate; @@ -287,7 +304,7 @@ static void ti_sn_bridge_set_refclk_freq(struct ti_sn65dsi86 *pdata) refclk_lut_size = ARRAY_SIZE(ti_sn_bridge_refclk_lut); clk_prepare_enable(pdata->refclk); } else { - refclk_rate = ti_sn_bridge_get_dsi_freq(pdata) * 1000; + refclk_rate = ti_sn_bridge_get_dsi_freq(pdata, state) * 1000; refclk_lut = ti_sn_bridge_dsiclk_lut; refclk_lut_size = ARRAY_SIZE(ti_sn_bridge_dsiclk_lut); } @@ -311,12 +328,13 @@ static void ti_sn_bridge_set_refclk_freq(struct ti_sn65dsi86 *pdata) pdata->pwm_refclk_freq = ti_sn_bridge_refclk_lut[i]; } -static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata) +static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata, + struct drm_atomic_state *state) { mutex_lock(&pdata->comms_mutex); /* configure bridge ref_clk */ - ti_sn_bridge_set_refclk_freq(pdata); + ti_sn_bridge_set_refclk_freq(pdata, state); /* * HPD on this bridge chip is a bit useless. This is an eDP bridge @@ -376,7 +394,7 @@ static int __maybe_unused ti_sn65dsi86_resume(struct device *dev) * clock so reading early doesn't work. */ if (pdata->refclk) - ti_sn65dsi86_enable_comms(pdata); + ti_sn65dsi86_enable_comms(pdata, NULL); return ret; } @@ -423,36 +441,8 @@ static int status_show(struct seq_file *s, void *data) return 0; } - DEFINE_SHOW_ATTRIBUTE(status); -static void ti_sn65dsi86_debugfs_remove(void *data) -{ - debugfs_remove_recursive(data); -} - -static void ti_sn65dsi86_debugfs_init(struct ti_sn65dsi86 *pdata) -{ - struct device *dev = pdata->dev; - struct dentry *debugfs; - int ret; - - debugfs = debugfs_create_dir(dev_name(dev), NULL); - - /* - * We might get an error back if debugfs wasn't enabled in the kernel - * so let's just silently return upon failure. - */ - if (IS_ERR_OR_NULL(debugfs)) - return; - - ret = devm_add_action_or_reset(dev, ti_sn65dsi86_debugfs_remove, debugfs); - if (ret) - return; - - debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); -} - /* ----------------------------------------------------------------------------- * Auxiliary Devices (*not* AUX) */ @@ -732,6 +722,7 @@ static int ti_sn_attach_host(struct auxiliary_device *adev, struct ti_sn65dsi86 } static int ti_sn_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); @@ -748,7 +739,7 @@ static int ti_sn_bridge_attach(struct drm_bridge *bridge, * Attach the next bridge. * We never want the next bridge to *also* create a connector. */ - ret = drm_bridge_attach(bridge->encoder, pdata->next_bridge, + ret = drm_bridge_attach(encoder, pdata->next_bridge, &pdata->bridge, flags | DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret < 0) goto err_initted_aux; @@ -821,12 +812,13 @@ static void ti_sn_bridge_atomic_disable(struct drm_bridge *bridge, regmap_update_bits(pdata->regmap, SN_ENH_FRAME_REG, VSTREAM_ENABLE, 0); } -static void ti_sn_bridge_set_dsi_rate(struct ti_sn65dsi86 *pdata) +static void ti_sn_bridge_set_dsi_rate(struct ti_sn65dsi86 *pdata, + struct drm_atomic_state *state) { unsigned int bit_rate_mhz, clk_freq_mhz; unsigned int val; struct drm_display_mode *mode = - &pdata->bridge.encoder->crtc->state->adjusted_mode; + get_new_adjusted_display_mode(&pdata->bridge, state); /* set DSIA clk frequency */ bit_rate_mhz = (mode->clock / 1000) * @@ -856,12 +848,14 @@ static const unsigned int ti_sn_bridge_dp_rate_lut[] = { 0, 1620, 2160, 2430, 2700, 3240, 4320, 5400 }; -static int ti_sn_bridge_calc_min_dp_rate_idx(struct ti_sn65dsi86 *pdata, unsigned int bpp) +static int ti_sn_bridge_calc_min_dp_rate_idx(struct ti_sn65dsi86 *pdata, + struct drm_atomic_state *state, + unsigned int bpp) { unsigned int bit_rate_khz, dp_rate_mhz; unsigned int i; struct drm_display_mode *mode = - &pdata->bridge.encoder->crtc->state->adjusted_mode; + get_new_adjusted_display_mode(&pdata->bridge, state); /* Calculate minimum bit rate based on our pixel clock. */ bit_rate_khz = mode->clock * bpp; @@ -960,10 +954,11 @@ static unsigned int ti_sn_bridge_read_valid_rates(struct ti_sn65dsi86 *pdata) return valid_rates; } -static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata) +static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata, + struct drm_atomic_state *state) { struct drm_display_mode *mode = - &pdata->bridge.encoder->crtc->state->adjusted_mode; + get_new_adjusted_display_mode(&pdata->bridge, state); u8 hsync_polarity = 0, vsync_polarity = 0; if (mode->flags & DRM_MODE_FLAG_NHSYNC) @@ -1105,7 +1100,7 @@ static void ti_sn_bridge_atomic_enable(struct drm_bridge *bridge, pdata->ln_polrs << LN_POLRS_OFFSET); /* set dsi clk frequency value */ - ti_sn_bridge_set_dsi_rate(pdata); + ti_sn_bridge_set_dsi_rate(pdata, state); /* * The SN65DSI86 only supports ASSR Display Authentication method and @@ -1140,7 +1135,7 @@ static void ti_sn_bridge_atomic_enable(struct drm_bridge *bridge, valid_rates = ti_sn_bridge_read_valid_rates(pdata); /* Train until we run out of rates */ - for (dp_rate_idx = ti_sn_bridge_calc_min_dp_rate_idx(pdata, bpp); + for (dp_rate_idx = ti_sn_bridge_calc_min_dp_rate_idx(pdata, state, bpp); dp_rate_idx < ARRAY_SIZE(ti_sn_bridge_dp_rate_lut); dp_rate_idx++) { if (!(valid_rates & BIT(dp_rate_idx))) @@ -1156,7 +1151,7 @@ static void ti_sn_bridge_atomic_enable(struct drm_bridge *bridge, } /* config video parameters */ - ti_sn_bridge_set_video_timings(pdata); + ti_sn_bridge_set_video_timings(pdata, state); /* enable video stream */ regmap_update_bits(pdata->regmap, SN_ENH_FRAME_REG, VSTREAM_ENABLE, @@ -1171,7 +1166,7 @@ static void ti_sn_bridge_atomic_pre_enable(struct drm_bridge *bridge, pm_runtime_get_sync(pdata->dev); if (!pdata->refclk) - ti_sn65dsi86_enable_comms(pdata); + ti_sn65dsi86_enable_comms(pdata, state); /* td7: min 100 us after enable before DSI data */ usleep_range(100, 110); @@ -1216,6 +1211,15 @@ static const struct drm_edid *ti_sn_bridge_edid_read(struct drm_bridge *bridge, return drm_edid_read_ddc(connector, &pdata->aux.ddc); } +static void ti_sn65dsi86_debugfs_init(struct drm_bridge *bridge, struct dentry *root) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + struct dentry *debugfs; + + debugfs = debugfs_create_dir(dev_name(pdata->dev), root); + debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); +} + static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .attach = ti_sn_bridge_attach, .detach = ti_sn_bridge_detach, @@ -1229,6 +1233,7 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .atomic_reset = drm_atomic_helper_bridge_reset, .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .debugfs_init = ti_sn65dsi86_debugfs_init, }; static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, @@ -1312,7 +1317,6 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, if (ret) return ret; - pdata->bridge.funcs = &ti_sn_bridge_funcs; pdata->bridge.of_node = np; pdata->bridge.type = pdata->next_bridge->type == DRM_MODE_CONNECTOR_DisplayPort ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; @@ -1894,6 +1898,7 @@ static int ti_sn65dsi86_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct ti_sn65dsi86 *pdata; + u8 id_buf[8]; int ret; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { @@ -1901,9 +1906,9 @@ static int ti_sn65dsi86_probe(struct i2c_client *client) return -ENODEV; } - pdata = devm_kzalloc(dev, sizeof(struct ti_sn65dsi86), GFP_KERNEL); - if (!pdata) - return -ENOMEM; + pdata = devm_drm_bridge_alloc(dev, struct ti_sn65dsi86, bridge, &ti_sn_bridge_funcs); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); dev_set_drvdata(dev, pdata); pdata->dev = dev; @@ -1937,7 +1942,15 @@ static int ti_sn65dsi86_probe(struct i2c_client *client) if (ret) return ret; - ti_sn65dsi86_debugfs_init(pdata); + pm_runtime_get_sync(dev); + ret = regmap_bulk_read(pdata->regmap, SN_DEVICE_ID_REGS, id_buf, ARRAY_SIZE(id_buf)); + pm_runtime_put_autosuspend(dev); + if (ret) + return dev_err_probe(dev, ret, "failed to read device id\n"); + + /* The ID string is stored backwards */ + if (strncmp(id_buf, "68ISD ", ARRAY_SIZE(id_buf))) + return dev_err_probe(dev, -EOPNOTSUPP, "unsupported device id\n"); /* * Break ourselves up into a collection of aux devices. The only real diff --git a/drivers/gpu/drm/bridge/ti-tdp158.c b/drivers/gpu/drm/bridge/ti-tdp158.c index 22316382451f..cca75443f012 100644 --- a/drivers/gpu/drm/bridge/ti-tdp158.c +++ b/drivers/gpu/drm/bridge/ti-tdp158.c @@ -45,11 +45,13 @@ static void tdp158_disable(struct drm_bridge *bridge, regulator_disable(tdp158->vcc); } -static int tdp158_attach(struct drm_bridge *bridge, enum drm_bridge_attach_flags flags) +static int tdp158_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, + enum drm_bridge_attach_flags flags) { struct tdp158 *tdp158 = bridge->driver_private; - return drm_bridge_attach(bridge->encoder, tdp158->next, bridge, flags); + return drm_bridge_attach(encoder, tdp158->next, bridge, flags); } static const struct drm_bridge_funcs tdp158_bridge_funcs = { diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c index 79ab5da827e1..e15d232ddbac 100644 --- a/drivers/gpu/drm/bridge/ti-tfp410.c +++ b/drivers/gpu/drm/bridge/ti-tfp410.c @@ -120,12 +120,13 @@ static void tfp410_hpd_callback(void *arg, enum drm_connector_status status) } static int tfp410_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tfp410 *dvi = drm_bridge_to_tfp410(bridge); int ret; - ret = drm_bridge_attach(bridge->encoder, dvi->next_bridge, bridge, + ret = drm_bridge_attach(encoder, dvi->next_bridge, bridge, DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret < 0) return ret; @@ -159,7 +160,7 @@ static int tfp410_attach(struct drm_bridge *bridge, drm_display_info_set_bus_formats(&dvi->connector.display_info, &dvi->bus_format, 1); - drm_connector_attach_encoder(&dvi->connector, bridge->encoder); + drm_connector_attach_encoder(&dvi->connector, encoder); return 0; } diff --git a/drivers/gpu/drm/bridge/ti-tpd12s015.c b/drivers/gpu/drm/bridge/ti-tpd12s015.c index 47b74cb25b14..1c289051a598 100644 --- a/drivers/gpu/drm/bridge/ti-tpd12s015.c +++ b/drivers/gpu/drm/bridge/ti-tpd12s015.c @@ -38,6 +38,7 @@ static inline struct tpd12s015_device *to_tpd12s015(struct drm_bridge *bridge) } static int tpd12s015_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { struct tpd12s015_device *tpd = to_tpd12s015(bridge); @@ -46,7 +47,7 @@ static int tpd12s015_attach(struct drm_bridge *bridge, if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) return -EINVAL; - ret = drm_bridge_attach(bridge->encoder, tpd->next_bridge, + ret = drm_bridge_attach(encoder, tpd->next_bridge, bridge, flags); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/ci/arm64.config b/drivers/gpu/drm/ci/arm64.config index a8fca079921b..fddfbd4d2493 100644 --- a/drivers/gpu/drm/ci/arm64.config +++ b/drivers/gpu/drm/ci/arm64.config @@ -193,6 +193,8 @@ CONFIG_PWM_MTK_DISP=y CONFIG_MTK_CMDQ=y CONFIG_REGULATOR_DA9211=y CONFIG_DRM_ANALOGIX_ANX7625=y +CONFIG_PHY_MTK_HDMI=y +CONFIG_PHY_MTK_MIPI_DSI=y # For nouveau. Note that DRM must be a module so that it's loaded after NFS is up to provide the firmware. CONFIG_ARCH_TEGRA=y diff --git a/drivers/gpu/drm/ci/build-igt.sh b/drivers/gpu/drm/ci/build-igt.sh index eddb5f782a5e..caa2f4804ed5 100644 --- a/drivers/gpu/drm/ci/build-igt.sh +++ b/drivers/gpu/drm/ci/build-igt.sh @@ -71,4 +71,4 @@ tar -cf artifacts/igt.tar /igt # Pass needed files to the test stage S3_ARTIFACT_NAME="igt.tar.gz" gzip -c artifacts/igt.tar > ${S3_ARTIFACT_NAME} -ci-fairy s3cp --token-file "${S3_JWT_FILE}" ${S3_ARTIFACT_NAME} https://${PIPELINE_ARTIFACTS_BASE}/${KERNEL_ARCH}/${S3_ARTIFACT_NAME} +s3_upload ${S3_ARTIFACT_NAME} https://${PIPELINE_ARTIFACTS_BASE}/${KERNEL_ARCH}/ diff --git a/drivers/gpu/drm/ci/build.sh b/drivers/gpu/drm/ci/build.sh index 19fe01257ab9..6fb74c51abe2 100644 --- a/drivers/gpu/drm/ci/build.sh +++ b/drivers/gpu/drm/ci/build.sh @@ -98,14 +98,14 @@ done make ${KERNEL_IMAGE_NAME} -mkdir -p /lava-files/ +mkdir -p /kernel/ for image in ${KERNEL_IMAGE_NAME}; do - cp arch/${KERNEL_ARCH}/boot/${image} /lava-files/. + cp arch/${KERNEL_ARCH}/boot/${image} /kernel/. done if [[ -n ${DEVICE_TREES} ]]; then make dtbs - cp ${DEVICE_TREES} /lava-files/. + cp ${DEVICE_TREES} /kernel/. fi make modules @@ -121,11 +121,11 @@ if [[ ${DEBIAN_ARCH} = "arm64" ]]; then -d arch/arm64/boot/Image.lzma \ -C lzma\ -b arch/arm64/boot/dts/qcom/sdm845-cheza-r3.dtb \ - /lava-files/cheza-kernel + /kernel/cheza-kernel KERNEL_IMAGE_NAME+=" cheza-kernel" # Make a gzipped copy of the Image for db410c. - gzip -k /lava-files/Image + gzip -k /kernel/Image KERNEL_IMAGE_NAME+=" Image.gz" fi @@ -139,7 +139,7 @@ cp -rfv drivers/gpu/drm/ci/* install/. . .gitlab-ci/container/container_post_build.sh if [[ "$UPLOAD_TO_MINIO" = "1" ]]; then - xz -7 -c -T${FDO_CI_CONCURRENT:-4} vmlinux > /lava-files/vmlinux.xz + xz -7 -c -T${FDO_CI_CONCURRENT:-4} vmlinux > /kernel/vmlinux.xz FILES_TO_UPLOAD="$KERNEL_IMAGE_NAME vmlinux.xz" if [[ -n $DEVICE_TREES ]]; then @@ -148,13 +148,13 @@ if [[ "$UPLOAD_TO_MINIO" = "1" ]]; then ls -l "${S3_JWT_FILE}" for f in $FILES_TO_UPLOAD; do - ci-fairy s3cp --token-file "${S3_JWT_FILE}" /lava-files/$f \ - https://${PIPELINE_ARTIFACTS_BASE}/${DEBIAN_ARCH}/$f + s3_upload /kernel/$f \ + https://${PIPELINE_ARTIFACTS_BASE}/${DEBIAN_ARCH}/ done S3_ARTIFACT_NAME="kernel-files.tar.zst" tar --zstd -cf $S3_ARTIFACT_NAME install - ci-fairy s3cp --token-file "${S3_JWT_FILE}" ${S3_ARTIFACT_NAME} https://${PIPELINE_ARTIFACTS_BASE}/${DEBIAN_ARCH}/${S3_ARTIFACT_NAME} + s3_upload ${S3_ARTIFACT_NAME} https://${PIPELINE_ARTIFACTS_BASE}/${DEBIAN_ARCH}/ echo "Download vmlinux.xz from https://${PIPELINE_ARTIFACTS_BASE}/${DEBIAN_ARCH}/vmlinux.xz" fi @@ -165,7 +165,7 @@ ln -s common artifacts/install/ci-common cp .config artifacts/${CI_JOB_NAME}_config for image in ${KERNEL_IMAGE_NAME}; do - cp /lava-files/$image artifacts/install/. + cp /kernel/$image artifacts/install/. done tar -C artifacts -cf artifacts/install.tar install diff --git a/drivers/gpu/drm/ci/build.yml b/drivers/gpu/drm/ci/build.yml index 274f118533a7..8eb56ebcf4aa 100644 --- a/drivers/gpu/drm/ci/build.yml +++ b/drivers/gpu/drm/ci/build.yml @@ -67,7 +67,7 @@ testing:arm32: # # db410c and db820c don't boot with KASAN_INLINE, probably due to the kernel # becoming too big for their bootloaders. - ENABLE_KCONFIGS: "PROVE_LOCKING DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT" + ENABLE_KCONFIGS: "PROVE_LOCKING DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT DEBUG_WW_MUTEX_SLOWPATH" UPLOAD_TO_MINIO: 1 MERGE_FRAGMENT: arm.config @@ -79,7 +79,7 @@ testing:arm64: # # db410c and db820c don't boot with KASAN_INLINE, probably due to the kernel # becoming too big for their bootloaders. - ENABLE_KCONFIGS: "PROVE_LOCKING DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT" + ENABLE_KCONFIGS: "PROVE_LOCKING DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT DEBUG_WW_MUTEX_SLOWPATH" UPLOAD_TO_MINIO: 1 MERGE_FRAGMENT: arm64.config @@ -91,7 +91,7 @@ testing:x86_64: # # db410c and db820c don't boot with KASAN_INLINE, probably due to the kernel # becoming too big for their bootloaders. - ENABLE_KCONFIGS: "PROVE_LOCKING DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT" + ENABLE_KCONFIGS: "PROVE_LOCKING DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT DEBUG_WW_MUTEX_SLOWPATH" UPLOAD_TO_MINIO: 1 MERGE_FRAGMENT: x86_64.config @@ -143,6 +143,10 @@ debian-arm64-release: rules: - when: never +debian-arm64-ubsan: + rules: + - when: never + debian-build-testing: rules: - when: never @@ -183,6 +187,10 @@ debian-testing-msan: rules: - when: never +debian-testing-ubsan: + rules: + - when: never + debian-vulkan: rules: - when: never diff --git a/drivers/gpu/drm/ci/container.yml b/drivers/gpu/drm/ci/container.yml index 07dc13ff865d..56c95c2f91ae 100644 --- a/drivers/gpu/drm/ci/container.yml +++ b/drivers/gpu/drm/ci/container.yml @@ -24,6 +24,18 @@ alpine/x86_64_build: rules: - when: never +debian/arm32_test-base: + rules: + - when: never + +debian/arm32_test-gl: + rules: + - when: never + +debian/arm32_test-vk: + rules: + - when: never + debian/arm64_test-gl: rules: - when: never @@ -32,6 +44,10 @@ debian/arm64_test-vk: rules: - when: never +debian/baremetal_arm32_test: + rules: + - when: never + debian/ppc64el_build: rules: - when: never @@ -40,6 +56,14 @@ debian/s390x_build: rules: - when: never +debian/x86_32_build: + rules: + - when: never + +debian/x86_64_test-android: + rules: + - when: never + debian/x86_64_test-vk: rules: - when: never diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml index f04aabe8327c..ba75b3a7eca4 100644 --- a/drivers/gpu/drm/ci/gitlab-ci.yml +++ b/drivers/gpu/drm/ci/gitlab-ci.yml @@ -1,11 +1,11 @@ variables: DRM_CI_PROJECT_PATH: &drm-ci-project-path mesa/mesa - DRM_CI_COMMIT_SHA: &drm-ci-commit-sha 7d3062470f3ccc6cb40540e772e902c7e2248024 + DRM_CI_COMMIT_SHA: &drm-ci-commit-sha f73132f1215a37ce8ffc711a0136c90649aaf128 UPSTREAM_REPO: https://gitlab.freedesktop.org/drm/kernel.git TARGET_BRANCH: drm-next - IGT_VERSION: 33adea9ebafd059ac88a5ccfec60536394f36c7c + IGT_VERSION: 04bedb9238586b81d4d4ca62b02e584f6cfc77af DEQP_RUNNER_GIT_URL: https://gitlab.freedesktop.org/mesa/deqp-runner.git DEQP_RUNNER_GIT_TAG: v0.20.0 @@ -20,8 +20,10 @@ variables: rm download-git-cache.sh set +o xtrace S3_JWT_FILE: /s3_jwt + S3_JWT_HEADER_FILE: /s3_jwt_header S3_JWT_FILE_SCRIPT: |- echo -n '${S3_JWT}' > '${S3_JWT_FILE}' && + echo -n "Authorization: Bearer ${S3_JWT}" > '${S3_JWT_HEADER_FILE}' && unset CI_JOB_JWT S3_JWT # Unsetting vulnerable env variables S3_HOST: s3.freedesktop.org # This bucket is used to fetch the kernel image @@ -143,11 +145,11 @@ stages: # Pre-merge pipeline - if: &is-pre-merge $CI_PIPELINE_SOURCE == "merge_request_event" # Push to a branch on a fork - - if: &is-fork-push $CI_PROJECT_NAMESPACE != "mesa" && $CI_PIPELINE_SOURCE == "push" + - if: &is-fork-push $CI_PIPELINE_SOURCE == "push" # nightly pipeline - if: &is-scheduled-pipeline $CI_PIPELINE_SOURCE == "schedule" # pipeline for direct pushes that bypassed the CI - - if: &is-direct-push $CI_PROJECT_NAMESPACE == "mesa" && $CI_PIPELINE_SOURCE == "push" && $GITLAB_USER_LOGIN != "marge-bot" + - if: &is-direct-push $CI_PIPELINE_SOURCE == "push" && $GITLAB_USER_LOGIN != "marge-bot" # Rules applied to every job in the pipeline @@ -170,26 +172,15 @@ stages: - !reference [.disable-farm-mr-rules, rules] # Never run immediately after merging, as we just ran everything - !reference [.never-post-merge-rules, rules] - # Build everything in merge pipelines, if any files affecting the pipeline - # were changed + # Build everything in merge pipelines - if: *is-merge-attempt - changes: &all_paths - - drivers/gpu/drm/ci/**/* when: on_success # Same as above, but for pre-merge pipelines - if: *is-pre-merge - changes: - *all_paths when: manual - # Skip everything for pre-merge and merge pipelines which don't change - # anything in the build - - if: *is-merge-attempt - when: never - - if: *is-pre-merge - when: never # Build everything after someone bypassed the CI - if: *is-direct-push - when: on_success + when: manual # Build everything in scheduled pipelines - if: *is-scheduled-pipeline when: on_success @@ -198,6 +189,36 @@ stages: - when: manual +# Repeat of the above but with `when: on_success` replaced with +# `when: delayed` + `start_in:`, for build-only jobs. +# Note: make sure the branches in this list are the same as in +# `.container+build-rules` above. +.build-only-delayed-rules: + rules: + - !reference [.common-rules, rules] + # Run when re-enabling a disabled farm, but not when disabling it + - !reference [.disable-farm-mr-rules, rules] + # Never run immediately after merging, as we just ran everything + - !reference [.never-post-merge-rules, rules] + # Build everything in merge pipelines + - if: *is-merge-attempt + when: delayed + start_in: &build-delay 5 minutes + # Same as above, but for pre-merge pipelines + - if: *is-pre-merge + when: manual + # Build everything after someone bypassed the CI + - if: *is-direct-push + when: manual + # Build everything in scheduled pipelines + - if: *is-scheduled-pipeline + when: delayed + start_in: *build-delay + # Allow building everything in fork pipelines, but build nothing unless + # manually triggered + - when: manual + + .ci-deqp-artifacts: artifacts: name: "${CI_PROJECT_NAME}_${CI_JOB_NAME}" @@ -232,7 +253,7 @@ make git archive: - tar -cvzf ../$CI_PROJECT_NAME.tar.gz . # Use id_tokens for JWT auth - - ci-fairy s3cp --token-file "${S3_JWT_FILE}" ../$CI_PROJECT_NAME.tar.gz https://$S3_HOST/${S3_GITCACHE_BUCKET}/$CI_PROJECT_NAMESPACE/$CI_PROJECT_NAME/$CI_PROJECT_NAME.tar.gz + - s3_upload ../$CI_PROJECT_NAME.tar.gz https://$S3_HOST/${S3_GITCACHE_BUCKET}/$CI_PROJECT_NAMESPACE/$CI_PROJECT_NAME/ # Sanity checks of MR settings and commit logs diff --git a/drivers/gpu/drm/ci/igt_runner.sh b/drivers/gpu/drm/ci/igt_runner.sh index 68b042e43b7f..2a0599f12c58 100755 --- a/drivers/gpu/drm/ci/igt_runner.sh +++ b/drivers/gpu/drm/ci/igt_runner.sh @@ -85,5 +85,16 @@ deqp-runner junit \ --limit 50 \ --template "See $ARTIFACTS_BASE_URL/results/{{testcase}}.xml" +# Check if /proc/lockdep_stats exists +if [ -f /proc/lockdep_stats ]; then + # If debug_locks is 0, it indicates lockdep is detected and it turns itself off. + debug_locks=$(grep 'debug_locks:' /proc/lockdep_stats | awk '{print $2}') + if [ "$debug_locks" -eq 0 ] && [ "$ret" -eq 0 ]; then + echo "Warning: LOCKDEP issue detected. Please check dmesg logs for more information." + cat /proc/lockdep_stats + ret=101 + fi +fi + cd $oldpath exit $ret diff --git a/drivers/gpu/drm/ci/image-tags.yml b/drivers/gpu/drm/ci/image-tags.yml index 20049f3626b2..53fe34b86578 100644 --- a/drivers/gpu/drm/ci/image-tags.yml +++ b/drivers/gpu/drm/ci/image-tags.yml @@ -1,5 +1,5 @@ variables: - CONTAINER_TAG: "20250204-mesa-uprev" + CONTAINER_TAG: "20250328-mesa-uprev" DEBIAN_X86_64_BUILD_BASE_IMAGE: "debian/x86_64_build-base" DEBIAN_BASE_TAG: "${CONTAINER_TAG}" @@ -20,3 +20,5 @@ variables: DEBIAN_PYUTILS_TAG: "${CONTAINER_TAG}" ALPINE_X86_64_LAVA_SSH_TAG: "${CONTAINER_TAG}" + + CONDITIONAL_BUILD_ANGLE_TAG: fec96cc945650c5fe9f7188cabe80d8a diff --git a/drivers/gpu/drm/ci/lava-submit.sh b/drivers/gpu/drm/ci/lava-submit.sh index 6e5ac51e8c0a..a1e8b34fb2d4 100755 --- a/drivers/gpu/drm/ci/lava-submit.sh +++ b/drivers/gpu/drm/ci/lava-submit.sh @@ -48,12 +48,13 @@ ROOTFS_URL="$(get_path_to_artifact lava-rootfs.tar.zst)" rm -rf results mkdir -p results/job-rootfs-overlay/ -artifacts/ci-common/generate-env.sh > results/job-rootfs-overlay/set-job-env-vars.sh +artifacts/ci-common/export-gitlab-job-env-for-dut.sh \ + > results/job-rootfs-overlay/set-job-env-vars.sh cp artifacts/ci-common/init-*.sh results/job-rootfs-overlay/ cp "$SCRIPTS_DIR"/setup-test-env.sh results/job-rootfs-overlay/ tar zcf job-rootfs-overlay.tar.gz -C results/job-rootfs-overlay/ . -ci-fairy s3cp --token-file "${S3_JWT_FILE}" job-rootfs-overlay.tar.gz "https://${JOB_ROOTFS_OVERLAY_PATH}" +s3_upload job-rootfs-overlay.tar.gz "https://${JOB_ARTIFACTS_BASE}" # Prepare env vars for upload. section_switch variables "Environment variables passed through to device:" diff --git a/drivers/gpu/drm/ci/test.yml b/drivers/gpu/drm/ci/test.yml index 6a1e059858e5..84a25f0e783b 100644 --- a/drivers/gpu/drm/ci/test.yml +++ b/drivers/gpu/drm/ci/test.yml @@ -1,6 +1,14 @@ +.allow_failure_lockdep: + variables: + FF_USE_NEW_BASH_EVAL_STRATEGY: 'true' + allow_failure: + exit_codes: + - 101 + .lava-test: extends: - .container+build-rules + - .allow_failure_lockdep timeout: "1h30m" rules: - !reference [.scheduled_pipeline-rules, rules] @@ -69,6 +77,7 @@ extends: - .baremetal-test-arm64 - .use-debian/baremetal_arm64_test + - .allow_failure_lockdep timeout: "1h30m" rules: - !reference [.scheduled_pipeline-rules, rules] @@ -89,6 +98,28 @@ tags: - $RUNNER_TAG +.software-driver: + stage: software-driver + extends: + - .allow_failure_lockdep + timeout: "1h30m" + rules: + - !reference [.scheduled_pipeline-rules, rules] + - when: on_success + extends: + - .test-gl + tags: + - kvm + script: + - ln -sf $CI_PROJECT_DIR/install /install + - mv install/bzImage /kernel/bzImage + - mkdir -p /lib/modules + - install/crosvm-runner.sh install/igt_runner.sh + needs: + - debian/x86_64_test-gl + - testing:x86_64 + - igt:x86_64 + .msm-sc7180: extends: - .lava-igt:arm64 @@ -133,7 +164,7 @@ msm:apq8016: BM_KERNEL_EXTRA_ARGS: clk_ignore_unused RUNNER_TAG: google-freedreno-db410c script: - - ./install/bare-metal/fastboot.sh + - ./install/bare-metal/fastboot.sh || exit $? msm:apq8096: extends: @@ -147,7 +178,7 @@ msm:apq8096: GPU_VERSION: apq8096 RUNNER_TAG: google-freedreno-db820c script: - - ./install/bare-metal/fastboot.sh + - ./install/bare-metal/fastboot.sh || exit $? msm:sdm845: extends: @@ -161,7 +192,7 @@ msm:sdm845: GPU_VERSION: sdm845 RUNNER_TAG: google-freedreno-cheza script: - - ./install/bare-metal/cros-servo.sh + - ./install/bare-metal/cros-servo.sh || exit $? msm:sm8350-hdk: extends: @@ -440,47 +471,16 @@ panfrost:g12b: - .panfrost-gpu virtio_gpu:none: - stage: software-driver - timeout: "1h30m" - rules: - - !reference [.scheduled_pipeline-rules, rules] - - when: on_success + extends: + - .software-driver variables: CROSVM_GALLIUM_DRIVER: llvmpipe DRIVER_NAME: virtio_gpu GPU_VERSION: none - extends: - - .test-gl - tags: - - kvm - script: - - ln -sf $CI_PROJECT_DIR/install /install - - mv install/bzImage /lava-files/bzImage - - install/crosvm-runner.sh install/igt_runner.sh - needs: - - debian/x86_64_test-gl - - testing:x86_64 - - igt:x86_64 vkms:none: - stage: software-driver - timeout: "1h30m" - rules: - - !reference [.scheduled_pipeline-rules, rules] - - when: on_success + extends: + - .software-driver variables: DRIVER_NAME: vkms GPU_VERSION: none - extends: - - .test-gl - tags: - - kvm - script: - - ln -sf $CI_PROJECT_DIR/install /install - - mv install/bzImage /lava-files/bzImage - - mkdir -p /lib/modules - - ./install/crosvm-runner.sh ./install/igt_runner.sh - needs: - - debian/x86_64_test-gl - - testing:x86_64 - - igt:x86_64 diff --git a/drivers/gpu/drm/ci/xfails/amdgpu-stoney-fails.txt b/drivers/gpu/drm/ci/xfails/amdgpu-stoney-fails.txt index 75374085f40f..f44dbce3151a 100644 --- a/drivers/gpu/drm/ci/xfails/amdgpu-stoney-fails.txt +++ b/drivers/gpu/drm/ci/xfails/amdgpu-stoney-fails.txt @@ -14,16 +14,10 @@ amdgpu/amd_plane@mpo-scale-nv12,Fail amdgpu/amd_plane@mpo-scale-p010,Fail amdgpu/amd_plane@mpo-scale-rgb,Crash amdgpu/amd_plane@mpo-swizzle-toggle,Fail -amdgpu/amd_uvd_dec@amdgpu_uvd_decode,Crash +amdgpu/amd_uvd_dec@amdgpu_uvd_decode,Fail kms_addfb_basic@bad-pitch-65536,Fail kms_addfb_basic@bo-too-small,Fail kms_addfb_basic@too-high,Fail -kms_async_flips@alternate-sync-async-flip,Fail -kms_async_flips@alternate-sync-async-flip-atomic,Fail -kms_async_flips@test-cursor,Fail -kms_async_flips@test-cursor-atomic,Fail -kms_async_flips@test-time-stamp,Fail -kms_async_flips@test-time-stamp-atomic,Fail kms_atomic_transition@plane-all-modeset-transition-internal-panels,Fail kms_atomic_transition@plane-all-transition,Fail kms_atomic_transition@plane-all-transition-nonblocking,Fail diff --git a/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt b/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt index 3879c4812a22..902d54027506 100644 --- a/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt +++ b/drivers/gpu/drm/ci/xfails/amdgpu-stoney-skips.txt @@ -14,6 +14,7 @@ gem_.* i915_.* xe_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/i915-amly-fails.txt b/drivers/gpu/drm/ci/xfails/i915-amly-fails.txt index a29cea4f234c..8e2b5504004e 100644 --- a/drivers/gpu/drm/ci/xfails/i915-amly-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-amly-fails.txt @@ -1,22 +1,18 @@ -core_setmaster@master-drop-set-shared-fd,Fail +core_setmaster_vs_auth,Fail i915_module_load@load,Fail i915_module_load@reload,Fail i915_module_load@reload-no-display,Fail i915_module_load@resize-bar,Fail i915_pm_rpm@gem-execbuf-stress,Timeout i915_pm_rpm@module-reload,Fail -kms_async_flips@test-time-stamp,Timeout -kms_ccs@crc-sprite-planes-basic-y-tiled-ccs,Timeout -kms_flip@dpms-off-confusion-interruptible,Timeout -kms_flip@wf_vblank-ts-check-interruptible,Fail +kms_ccs@ccs-on-another-bo-y-tiled-gen12-rc-ccs-cc,Timeout +kms_fb_coherency@memset-crc,Crash kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-upscaling,Fail -kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-upscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling,Fail -kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-linear-to-16bpp-linear-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-linear-to-16bpp-linear-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-linear-to-32bpp-linear-downscaling,Fail @@ -31,12 +27,18 @@ kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs-downscaling,Fail +kms_frontbuffer_tracking@fbc-rgb101010-draw-mmap-cpu,Timeout kms_lease@lease-uevent,Fail -kms_lease@page-flip-implicit-plane,Timeout kms_plane_alpha_blend@alpha-basic,Fail kms_plane_alpha_blend@alpha-opaque-fb,Fail kms_plane_alpha_blend@alpha-transparent-fb,Fail kms_plane_alpha_blend@constant-alpha-max,Fail +kms_plane_scaling@planes-upscale-factor-0-25,Timeout +kms_pm_backlight@brightness-with-dpms,Crash +kms_pm_backlight@fade,Crash +kms_prop_blob@invalid-set-prop-any,Fail +kms_properties@connector-properties-legacy,Timeout +kms_universal_plane@disable-primary-vs-flip,Timeout perf@i915-ref-count,Fail perf_pmu@module-unload,Fail perf_pmu@rc6,Crash @@ -44,8 +46,3 @@ sysfs_heartbeat_interval@long,Timeout sysfs_heartbeat_interval@off,Timeout sysfs_preempt_timeout@off,Timeout sysfs_timeslice_duration@off,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail -xe_module_load@reload,Fail -xe_module_load@reload-no-display,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt b/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt index 2ef1dc35a7fa..922327632eff 100644 --- a/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-amly-skips.txt @@ -11,6 +11,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* diff --git a/drivers/gpu/drm/ci/xfails/i915-apl-fails.txt b/drivers/gpu/drm/ci/xfails/i915-apl-fails.txt index ee11999e3da1..7353ab11e940 100644 --- a/drivers/gpu/drm/ci/xfails/i915-apl-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-apl-fails.txt @@ -1,4 +1,3 @@ -core_setmaster@master-drop-set-user,Fail i915_module_load@load,Fail i915_module_load@reload,Fail i915_module_load@reload-no-display,Fail @@ -16,6 +15,7 @@ kms_flip_scaled_crc@flip-64bpp-linear-to-16bpp-linear-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-linear-to-32bpp-linear-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-linear-to-32bpp-linear-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-xtile-to-16bpp-xtile-downscaling,Fail +kms_flip_scaled_crc@flip-64bpp-xtile-to-16bpp-xtile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-xtile-to-32bpp-xtile-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-xtile-to-32bpp-xtile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-16bpp-ytile-downscaling,Fail @@ -30,7 +30,6 @@ kms_plane_alpha_blend@alpha-opaque-fb,Fail kms_plane_alpha_blend@alpha-transparent-fb,Fail kms_plane_alpha_blend@constant-alpha-max,Fail kms_pm_backlight@basic-brightness,Fail -kms_pm_backlight@brightness-with-dpms,Crash kms_pm_backlight@fade,Fail kms_pm_backlight@fade-with-dpms,Fail kms_pm_rpm@modeset-stress-extra-wait,Timeout @@ -43,8 +42,3 @@ sysfs_heartbeat_interval@long,Timeout sysfs_heartbeat_interval@off,Timeout sysfs_preempt_timeout@off,Timeout sysfs_timeslice_duration@off,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail -xe_module_load@reload,Fail -xe_module_load@reload-no-display,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt b/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt index 4f50e0240ff4..80bf2741866c 100644 --- a/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-apl-skips.txt @@ -13,6 +13,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* diff --git a/drivers/gpu/drm/ci/xfails/i915-cml-fails.txt b/drivers/gpu/drm/ci/xfails/i915-cml-fails.txt index 47b3f1d42bb6..6fef7c1e56ea 100644 --- a/drivers/gpu/drm/ci/xfails/i915-cml-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-cml-fails.txt @@ -1,4 +1,4 @@ -core_setmaster@master-drop-set-shared-fd,Fail +core_setmaster_vs_auth,Fail i915_module_load@load,Fail i915_module_load@reload,Fail i915_module_load@reload-no-display,Fail @@ -8,8 +8,9 @@ i915_pipe_stress@stress-xrgb8888-ytiled,Fail i915_pm_rpm@gem-execbuf-stress,Timeout i915_pm_rpm@module-reload,Fail i915_pm_rpm@system-suspend-execbuf,Timeout -kms_async_flips@test-time-stamp,Timeout -kms_ccs@crc-sprite-planes-basic-y-tiled-ccs,Timeout +kms_ccs@ccs-on-another-bo-y-tiled-gen12-rc-ccs-cc,Timeout +kms_cursor_crc@cursor-suspend,Timeout +kms_fb_coherency@memset-crc,Crash kms_flip@busy-flip,Timeout kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-upscaling,Fail @@ -34,17 +35,22 @@ kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs-downscaling,Fail kms_lease@lease-uevent,Fail -kms_lease@page-flip-implicit-plane,Timeout +kms_pipe_stress@stress-xrgb8888-untiled,Fail +kms_pipe_stress@stress-xrgb8888-ytiled,Fail kms_plane_alpha_blend@alpha-basic,Fail kms_plane_alpha_blend@alpha-opaque-fb,Fail kms_plane_alpha_blend@alpha-transparent-fb,Fail kms_plane_alpha_blend@constant-alpha-max,Fail +kms_plane_scaling@planes-upscale-factor-0-25,Timeout +kms_pm_backlight@brightness-with-dpms,Crash +kms_pm_backlight@fade,Crash +kms_prop_blob@invalid-set-prop-any,Fail +kms_properties@connector-properties-legacy,Timeout kms_psr2_sf@cursor-plane-update-sf,Fail kms_psr2_sf@overlay-plane-update-continuous-sf,Fail kms_psr2_sf@overlay-plane-update-sf-dmg-area,Fail kms_psr2_sf@overlay-primary-update-sf-dmg-area,Fail kms_psr2_sf@plane-move-sf-dmg-area,Fail -kms_psr2_sf@pr-cursor-plane-update-sf,Timeout kms_psr2_sf@primary-plane-update-sf-dmg-area,Fail kms_psr2_sf@primary-plane-update-sf-dmg-area-big-fb,Fail kms_psr2_sf@psr2-cursor-plane-update-sf,Fail @@ -57,6 +63,7 @@ kms_psr2_sf@psr2-primary-plane-update-sf-dmg-area-big-fb,Fail kms_psr2_su@page_flip-NV12,Fail kms_psr2_su@page_flip-P010,Fail kms_setmode@basic,Fail +kms_universal_plane@disable-primary-vs-flip,Timeout perf@i915-ref-count,Fail perf_pmu@module-unload,Fail perf_pmu@rc6,Crash @@ -65,6 +72,3 @@ sysfs_heartbeat_interval@long,Timeout sysfs_heartbeat_interval@off,Timeout sysfs_preempt_timeout@off,Timeout sysfs_timeslice_duration@off,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt b/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt index c87ff8b40e99..c393a138b8a6 100644 --- a/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-cml-skips.txt @@ -9,6 +9,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* @@ -16,7 +17,6 @@ gem_.* # Hangs the machine and timeout occurs i915_pm_rc6_residency.* i915_suspend.* -xe_module_load.* api_intel_allocator.* kms_cursor_legacy.* i915_pm_rpm.* diff --git a/drivers/gpu/drm/ci/xfails/i915-glk-fails.txt b/drivers/gpu/drm/ci/xfails/i915-glk-fails.txt index 843c363b42f5..8adf5f0a6e80 100644 --- a/drivers/gpu/drm/ci/xfails/i915-glk-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-glk-fails.txt @@ -1,39 +1,47 @@ -core_setmaster@master-drop-set-shared-fd,Fail -core_setmaster@master-drop-set-user,Fail +core_setmaster_vs_auth,Fail gen9_exec_parse@unaligned-access,Fail i915_module_load@load,Fail i915_module_load@reload,Fail i915_module_load@reload-no-display,Fail i915_module_load@resize-bar,Fail -kms_dirtyfb@default-dirtyfb-ioctl,Fail kms_dirtyfb@drrs-dirtyfb-ioctl,Fail -kms_dirtyfb@fbc-dirtyfb-ioctl,Fail +kms_flip@blocking-wf_vblank,Fail +kms_flip@wf_vblank-ts-check,Fail kms_flip@wf_vblank-ts-check-interruptible,Fail kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-downscaling,Fail -kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-downscaling,Fail +kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-upscaling,Fail +kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-upscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling,Fail -kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-linear-to-16bpp-linear-downscaling,Fail +kms_flip_scaled_crc@flip-64bpp-linear-to-16bpp-linear-upscaling,Fail +kms_flip_scaled_crc@flip-64bpp-linear-to-32bpp-linear-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-linear-to-32bpp-linear-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-xtile-to-16bpp-xtile-downscaling,Fail +kms_flip_scaled_crc@flip-64bpp-xtile-to-16bpp-xtile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-xtile-to-32bpp-xtile-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-xtile-to-32bpp-xtile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-16bpp-ytile-downscaling,Fail +kms_flip_scaled_crc@flip-64bpp-ytile-to-16bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling,Fail -kms_frontbuffer_tracking@fbcdrrs-tiling-linear,Fail +kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs-downscaling,Fail +kms_frontbuffer_tracking@fbc-rgb101010-draw-mmap-cpu,Timeout +kms_frontbuffer_tracking@fbc-tiling-linear,Fail kms_lease@lease-uevent,Fail kms_plane_alpha_blend@alpha-opaque-fb,Fail +kms_plane_scaling@planes-upscale-factor-0-25,Timeout +kms_pm_backlight@brightness-with-dpms,Crash +kms_pm_backlight@fade,Crash +kms_prop_blob@invalid-set-prop-any,Fail +kms_properties@connector-properties-legacy,Timeout +kms_rotation_crc@multiplane-rotation,Fail kms_rotation_crc@multiplane-rotation-cropping-top,Fail +kms_universal_plane@disable-primary-vs-flip,Timeout perf@non-zero-reason,Timeout sysfs_heartbeat_interval@long,Timeout +sysfs_heartbeat_interval@off,Timeout sysfs_preempt_timeout@off,Timeout sysfs_timeslice_duration@off,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail -xe_module_load@reload,Fail -xe_module_load@reload-no-display,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt b/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt index 219ae839323a..2e4ef9f35654 100644 --- a/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-glk-skips.txt @@ -12,6 +12,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* diff --git a/drivers/gpu/drm/ci/xfails/i915-jsl-fails.txt b/drivers/gpu/drm/ci/xfails/i915-jsl-fails.txt index 0e08fff741aa..57453e340040 100644 --- a/drivers/gpu/drm/ci/xfails/i915-jsl-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-jsl-fails.txt @@ -3,12 +3,13 @@ i915_module_load@load,Fail i915_module_load@reload,Fail i915_module_load@reload-no-display,Fail i915_module_load@resize-bar,Fail +i915_pm_rpm@gem-execbuf-stress,Timeout kms_flip@dpms-off-confusion,Fail +kms_flip@nonexisting-fb,Fail kms_flip@single-buffer-flip-vs-dpms-off-vs-modeset,Fail -kms_flip@single-buffer-flip-vs-dpms-off-vs-modeset-interruptible,Fail kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-upscaling,Fail -kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-downscaling,Fail +kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-downscaling,UnexpectedImprovement(Skip) kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-upscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-upscaling,Fail @@ -28,7 +29,6 @@ kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs-downscaling,Fail -kms_frontbuffer_tracking@fbc-rgb565-draw-blt,Timeout kms_lease@lease-uevent,Fail kms_pm_rpm@modeset-stress-extra-wait,Timeout kms_rotation_crc@bad-pixel-format,Fail @@ -37,13 +37,10 @@ kms_rotation_crc@multiplane-rotation-cropping-bottom,Fail kms_rotation_crc@multiplane-rotation-cropping-top,Fail perf@i915-ref-count,Fail perf_pmu@module-unload,Fail +perf_pmu@most-busy-idle-check-all,Fail perf_pmu@rc6,Crash +prime_busy@before-wait,Fail sysfs_heartbeat_interval@long,Timeout sysfs_heartbeat_interval@off,Timeout sysfs_preempt_timeout@off,Timeout sysfs_timeslice_duration@off,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail -xe_module_load@reload,Fail -xe_module_load@reload-no-display,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-jsl-skips.txt b/drivers/gpu/drm/ci/xfails/i915-jsl-skips.txt index 1a3d87c0ca6e..8dec57da1bb3 100644 --- a/drivers/gpu/drm/ci/xfails/i915-jsl-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-jsl-skips.txt @@ -9,6 +9,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* diff --git a/drivers/gpu/drm/ci/xfails/i915-kbl-fails.txt b/drivers/gpu/drm/ci/xfails/i915-kbl-fails.txt index d4fba4f55ec1..117098bc95d9 100644 --- a/drivers/gpu/drm/ci/xfails/i915-kbl-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-kbl-fails.txt @@ -21,8 +21,3 @@ sysfs_heartbeat_interval@long,Timeout sysfs_heartbeat_interval@off,Timeout sysfs_preempt_timeout@off,Timeout sysfs_timeslice_duration@off,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail -xe_module_load@reload,Fail -xe_module_load@reload-no-display,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt b/drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt index dc722d6a774e..e287462a491a 100644 --- a/drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-kbl-skips.txt @@ -12,6 +12,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* diff --git a/drivers/gpu/drm/ci/xfails/i915-tgl-fails.txt b/drivers/gpu/drm/ci/xfails/i915-tgl-fails.txt index 93d42b146df9..462c050a8b2d 100644 --- a/drivers/gpu/drm/ci/xfails/i915-tgl-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-tgl-fails.txt @@ -1,13 +1,14 @@ api_intel_allocator@reopen,Timeout api_intel_bb@destroy-bb,Timeout core_hotunplug@hotrebind-lateclose,Timeout -drm_read@short-buffer-block,Timeout dumb_buffer@map-valid,Timeout i915_module_load@load,Fail i915_module_load@reload,Fail i915_module_load@reload-no-display,Fail i915_module_load@resize-bar,Fail +i915_pm_rpm@gem-execbuf-stress,Timeout i915_pm_rps@engine-order,Timeout +i915_pm_rps@waitboost,Fail kms_lease@lease-uevent,Fail kms_rotation_crc@multiplane-rotation,Fail perf@i915-ref-count,Fail @@ -16,6 +17,7 @@ perf_pmu@enable-race,Timeout perf_pmu@module-unload,Fail perf_pmu@rc6,Crash perf_pmu@semaphore-wait-idle,Timeout +prime_busy@before,Fail prime_mmap@test_refcounting,Timeout sriov_basic@enable-vfs-bind-unbind-each-numvfs-all,Timeout syncobj_basic@illegal-fd-to-handle,Timeout @@ -26,8 +28,3 @@ syncobj_wait@multi-wait-all-submitted,Timeout syncobj_wait@multi-wait-for-submit-submitted-signaled,Timeout syncobj_wait@wait-any-complex,Timeout syncobj_wait@wait-delayed-signal,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail -xe_module_load@reload,Fail -xe_module_load@reload-no-display,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt b/drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt index 938377896841..429dc3c731df 100644 --- a/drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-tgl-skips.txt @@ -18,6 +18,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* diff --git a/drivers/gpu/drm/ci/xfails/i915-whl-fails.txt b/drivers/gpu/drm/ci/xfails/i915-whl-fails.txt index 1cb6978c86dc..0f167cfd503c 100644 --- a/drivers/gpu/drm/ci/xfails/i915-whl-fails.txt +++ b/drivers/gpu/drm/ci/xfails/i915-whl-fails.txt @@ -1,4 +1,4 @@ -core_setmaster@master-drop-set-shared-fd,Fail +core_setmaster_vs_auth,Fail i915_module_load@load,Fail i915_module_load@reload,Fail i915_module_load@reload-no-display,Fail @@ -6,10 +6,9 @@ i915_module_load@resize-bar,Fail i915_pm_rpm@gem-execbuf-stress,Timeout i915_pm_rpm@module-reload,Fail i915_pm_rpm@system-suspend-execbuf,Timeout -kms_async_flips@test-time-stamp,Timeout -kms_ccs@crc-sprite-planes-basic-y-tiled-ccs,Timeout -kms_dirtyfb@default-dirtyfb-ioctl,Fail -kms_dirtyfb@fbc-dirtyfb-ioctl,Fail +kms_ccs@ccs-on-another-bo-y-tiled-gen12-rc-ccs-cc,Timeout +kms_cursor_crc@cursor-suspend,Timeout +kms_fb_coherency@memset-crc,Crash kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-downscaling,Fail kms_flip_scaled_crc@flip-32bpp-linear-to-64bpp-linear-upscaling,Fail kms_flip_scaled_crc@flip-32bpp-xtile-to-64bpp-xtile-downscaling,Fail @@ -30,13 +29,19 @@ kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-downscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling,Fail kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs-downscaling,Fail +kms_frontbuffer_tracking@fbc-rgb101010-draw-mmap-cpu,Timeout kms_frontbuffer_tracking@fbc-tiling-linear,Fail kms_lease@lease-uevent,Fail -kms_lease@page-flip-implicit-plane,Timeout kms_plane_alpha_blend@alpha-basic,Fail kms_plane_alpha_blend@alpha-opaque-fb,Fail kms_plane_alpha_blend@alpha-transparent-fb,Fail kms_plane_alpha_blend@constant-alpha-max,Fail +kms_plane_scaling@planes-upscale-factor-0-25,Timeout +kms_pm_backlight@brightness-with-dpms,Crash +kms_pm_backlight@fade,Crash +kms_prop_blob@invalid-set-prop-any,Fail +kms_properties@connector-properties-legacy,Timeout +kms_universal_plane@disable-primary-vs-flip,Timeout perf@i915-ref-count,Fail perf_pmu@module-unload,Fail perf_pmu@rc6,Crash @@ -45,8 +50,3 @@ sysfs_heartbeat_interval@long,Timeout sysfs_heartbeat_interval@off,Timeout sysfs_preempt_timeout@off,Timeout sysfs_timeslice_duration@off,Timeout -xe_module_load@force-load,Fail -xe_module_load@load,Fail -xe_module_load@many-reload,Fail -xe_module_load@reload,Fail -xe_module_load@reload-no-display,Fail diff --git a/drivers/gpu/drm/ci/xfails/i915-whl-skips.txt b/drivers/gpu/drm/ci/xfails/i915-whl-skips.txt index 29bff8922ae1..7e7374ebf3d1 100644 --- a/drivers/gpu/drm/ci/xfails/i915-whl-skips.txt +++ b/drivers/gpu/drm/ci/xfails/i915-whl-skips.txt @@ -9,6 +9,7 @@ nouveau_.* ^v3d.* ^vc4.* ^vmwgfx* +^xe.* # GEM tests takes ~1000 hours, so skip it gem_.* diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt index 4f176c04ec4e..592d7d69e6fc 100644 --- a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt +++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-fails.txt @@ -17,8 +17,28 @@ kms_bw@linear-tiling-2-displays-3840x2160p,Fail kms_color@invalid-gamma-lut-sizes,Fail kms_cursor_legacy@cursor-vs-flip-atomic,Fail kms_cursor_legacy@cursor-vs-flip-legacy,Fail +kms_cursor_legacy@flip-vs-cursor-atomic,Fail +kms_cursor_legacy@flip-vs-cursor-legacy,Fail +kms_cursor_legacy@flip-vs-cursor-toggle,Fail +kms_cursor_legacy@flip-vs-cursor-varying-size,Fail +kms_flip@basic-plain-flip,Fail +kms_flip@dpms-off-confusion,Fail +kms_flip@dpms-off-confusion-interruptible,Fail +kms_flip@flip-vs-absolute-wf_vblank,Fail +kms_flip@flip-vs-absolute-wf_vblank-interruptible,Fail +kms_flip@flip-vs-blocking-wf-vblank,Fail +kms_flip@flip-vs-expired-vblank,Fail +kms_flip@flip-vs-expired-vblank-interruptible,Fail kms_flip@flip-vs-modeset-vs-hang,Fail +kms_flip@flip-vs-panning,Fail +kms_flip@flip-vs-panning-interruptible,Fail kms_flip@flip-vs-panning-vs-hang,Fail kms_flip@flip-vs-suspend,Fail kms_flip@flip-vs-suspend-interruptible,Fail +kms_flip@plain-flip-fb-recreate,Fail +kms_flip@plain-flip-fb-recreate-interruptible,Fail +kms_flip@plain-flip-interruptible,Fail +kms_flip@plain-flip-ts-check,Fail +kms_flip@plain-flip-ts-check-interruptible,Fail +kms_invalid_mode@overflow-vrefresh,Fail kms_lease@lease-uevent,Fail diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-flakes.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-flakes.txt index 2956567c3048..443596d9e662 100644 --- a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-flakes.txt +++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-flakes.txt @@ -46,3 +46,10 @@ kms_prop_blob@invalid-set-prop # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_bw@connected-linear-tiling-1-displays-2160x1440p + +# Board Name: mt8173-elm-hana +# Bug Report: https://lore.kernel.org/linux-mediatek/d25442b9-0b6b-433c-8e23-997840fad305@collabora.com/T/#u +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_flip@flip-vs-wf_vblank-interruptible diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-skips.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-skips.txt index d0db51874aef..b5ee7323a160 100644 --- a/drivers/gpu/drm/ci/xfails/mediatek-mt8173-skips.txt +++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8173-skips.txt @@ -11,6 +11,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8183-fails.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8183-fails.txt index 5a063361d7f2..184d0cccc318 100644 --- a/drivers/gpu/drm/ci/xfails/mediatek-mt8183-fails.txt +++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8183-fails.txt @@ -1,12 +1,38 @@ -core_setmaster@master-drop-set-user,Fail dumb_buffer@create-clear,Crash kms_bw@connected-linear-tiling-1-displays-1920x1080p,Fail +kms_bw@connected-linear-tiling-1-displays-2160x1440p,Fail kms_bw@connected-linear-tiling-1-displays-2560x1440p,Fail kms_bw@connected-linear-tiling-1-displays-3840x2160p,Fail kms_bw@linear-tiling-1-displays-1920x1080p,Fail kms_bw@linear-tiling-1-displays-2160x1440p,Fail kms_bw@linear-tiling-1-displays-3840x2160p,Fail +kms_color@invalid-gamma-lut-sizes,Fail kms_cursor_legacy@cursor-vs-flip-atomic,Fail +kms_cursor_legacy@cursor-vs-flip-legacy,Fail +kms_cursor_legacy@flip-vs-cursor-atomic,Fail +kms_cursor_legacy@flip-vs-cursor-legacy,Fail +kms_cursor_legacy@flip-vs-cursor-toggle,Fail +kms_cursor_legacy@flip-vs-cursor-varying-size,Fail +kms_flip@basic-flip-vs-wf_vblank,Fail +kms_flip@basic-plain-flip,Fail +kms_flip@dpms-off-confusion,Fail +kms_flip@dpms-off-confusion-interruptible,Fail +kms_flip@flip-vs-absolute-wf_vblank,Fail +kms_flip@flip-vs-absolute-wf_vblank-interruptible,Fail +kms_flip@flip-vs-blocking-wf-vblank,Fail +kms_flip@flip-vs-expired-vblank,Fail +kms_flip@flip-vs-expired-vblank-interruptible,Fail +kms_flip@flip-vs-modeset-vs-hang,Fail +kms_flip@flip-vs-panning,Fail +kms_flip@flip-vs-panning-interruptible,Fail kms_flip@flip-vs-panning-vs-hang,Fail kms_flip@flip-vs-suspend,Fail +kms_flip@flip-vs-suspend-interruptible,Fail +kms_flip@flip-vs-wf_vblank-interruptible,Fail +kms_flip@plain-flip-fb-recreate,Fail +kms_flip@plain-flip-fb-recreate-interruptible,Fail +kms_flip@plain-flip-interruptible,Fail +kms_flip@plain-flip-ts-check,Fail +kms_flip@plain-flip-ts-check-interruptible,Fail +kms_invalid_mode@overflow-vrefresh,Fail kms_lease@lease-uevent,Fail diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8183-flakes.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8183-flakes.txt index df7e5ce7a036..0c67fec92450 100644 --- a/drivers/gpu/drm/ci/xfails/mediatek-mt8183-flakes.txt +++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8183-flakes.txt @@ -18,3 +18,24 @@ kms_cursor_legacy@cursor-vs-flip-atomic-transitions # IGT Version: 1.28-gf13702b8e # Linux Version: 6.10.0-rc5 fbdev@write + +# Board Name: mt8183-kukui-jacuzzi-juniper-sku16 +# Bug Report: https://lore.kernel.org/linux-mediatek/a520d1d6-95b3-4573-b8f2-689f05bc2230@collabora.com/T/#u +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_flip@basic-flip-vs-modeset + +# Board Name: mt8183-kukui-jacuzzi-juniper-sku16 +# Bug Report: https://lore.kernel.org/linux-mediatek/ca960a82-00fc-4183-b983-998f7ac2fbb5@collabora.com/T/#u +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_atomic_transition@plane-all-modeset-transition-internal-panels + +# Board Name: mt8183-kukui-jacuzzi-juniper-sku16 +# Bug Report: https://lore.kernel.org/linux-mediatek/da578eed-224f-4374-853a-1ff0aa20d03b@collabora.com/T/#u +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_atomic_transition@plane-toggle-modeset-transition diff --git a/drivers/gpu/drm/ci/xfails/mediatek-mt8183-skips.txt b/drivers/gpu/drm/ci/xfails/mediatek-mt8183-skips.txt index d0db51874aef..b5ee7323a160 100644 --- a/drivers/gpu/drm/ci/xfails/mediatek-mt8183-skips.txt +++ b/drivers/gpu/drm/ci/xfails/mediatek-mt8183-skips.txt @@ -11,6 +11,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt b/drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt index 8198e06344a3..9fd44a4b962a 100644 --- a/drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt +++ b/drivers/gpu/drm/ci/xfails/meson-g12b-skips.txt @@ -11,6 +11,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt index 7752adff05c1..72c469021b66 100644 --- a/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt +++ b/drivers/gpu/drm/ci/xfails/msm-apq8016-fails.txt @@ -1,8 +1,4 @@ kms_3d,Fail -kms_cursor_legacy@forked-bo,Fail -kms_cursor_legacy@forked-move,Fail -kms_cursor_legacy@single-bo,Fail -kms_cursor_legacy@torture-bo,Fail kms_force_connector_basic@force-edid,Fail kms_hdmi_inject@inject-4k,Fail kms_lease@lease-uevent,Fail diff --git a/drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt b/drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt index 1674c8e214d6..87724413174c 100644 --- a/drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt +++ b/drivers/gpu/drm/ci/xfails/msm-apq8016-skips.txt @@ -10,6 +10,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/msm-apq8096-skips.txt b/drivers/gpu/drm/ci/xfails/msm-apq8096-skips.txt index 5550be5486ed..a4d2f2a7963a 100644 --- a/drivers/gpu/drm/ci/xfails/msm-apq8096-skips.txt +++ b/drivers/gpu/drm/ci/xfails/msm-apq8096-skips.txt @@ -13,6 +13,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-flakes.txt b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-flakes.txt index 8910afb6acf2..d270af1cca52 100644 --- a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-flakes.txt +++ b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-flakes.txt @@ -32,3 +32,10 @@ kms_lease@page-flip-implicit-plane # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_plane@plane-position-hole-dpms + +# Board Name: sc7180-trogdor-kingoftown +# Bug Report: https://gitlab.freedesktop.org/drm/msm/-/issues/73 +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_plane@plane-position-covered diff --git a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-skips.txt b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-skips.txt index 478d7c161616..d4b8ba3a54a9 100644 --- a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-skips.txt +++ b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-kingoftown-skips.txt @@ -13,6 +13,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. @@ -28,3 +29,6 @@ kms_cursor_crc@cursor-random-max-size # https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/162 kms_display_modes@extended-mode-basic kms_display_modes@mst-extended-mode-negative + +# It causes other tests to fail, so skip it. +kms_invalid_mode@overflow-vrefresh diff --git a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-flakes.txt b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-flakes.txt index cd3d3b0befe4..cafc802cecea 100644 --- a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-flakes.txt +++ b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-flakes.txt @@ -11,3 +11,10 @@ msm/msm_mapping@shadow # IGT Version: 1.28-gf13702b8e # Linux Version: 6.10.0-rc5 kms_lease@page-flip-implicit-plane + +# Board Name: sc7180-trogdor-lazor-limozeen-nots-r5 +# Bug Report: https://gitlab.freedesktop.org/drm/msm/-/issues/74 +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_cursor_crc@cursor-random-128x128 diff --git a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-skips.txt b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-skips.txt index ef9318afcd89..022db559cc7d 100644 --- a/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-skips.txt +++ b/drivers/gpu/drm/ci/xfails/msm-sc7180-trogdor-lazor-limozeen-skips.txt @@ -13,6 +13,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/msm-sdm845-flakes.txt b/drivers/gpu/drm/ci/xfails/msm-sdm845-flakes.txt index 38ec0305c1f4..e32d73c6c98e 100644 --- a/drivers/gpu/drm/ci/xfails/msm-sdm845-flakes.txt +++ b/drivers/gpu/drm/ci/xfails/msm-sdm845-flakes.txt @@ -130,3 +130,10 @@ kms_lease@page-flip-implicit-plane # IGT Version: 1.28-ga73311079 # Linux Version: 6.11.0-rc5 kms_flip@flip-vs-expired-vblank + +# Board Name: sdm845-cheza-r3 +# Bug Report: https://gitlab.freedesktop.org/drm/msm/-/issues/75 +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_flip@plain-flip-ts-check-interruptible diff --git a/drivers/gpu/drm/ci/xfails/msm-sdm845-skips.txt b/drivers/gpu/drm/ci/xfails/msm-sdm845-skips.txt index 2ce7f7e23a01..6c86d1953e11 100644 --- a/drivers/gpu/drm/ci/xfails/msm-sdm845-skips.txt +++ b/drivers/gpu/drm/ci/xfails/msm-sdm845-skips.txt @@ -18,6 +18,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. @@ -35,3 +36,315 @@ kms_content_protection@uevent # https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/162 kms_display_modes@extended-mode-basic kms_display_modes@mst-extended-mode-negative + +# Kernel panic +msm/msm_recovery@hangcheck +# DEBUG - Begin test msm/msm_recovery@hangcheck +# Console: switching to colour dummy device 80x25 +# [ 489.526286] [IGT] msm_recovery: executing +# [ 489.531926] [IGT] msm_recovery: starting subtest hangcheck +# [ 492.808574] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 492.820358] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 492.831154] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 493.832570] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 493.844177] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 493.854971] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 494.824633] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 494.836237] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 494.847034] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 495.816570] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 495.828170] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 495.838966] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 496.804643] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 496.816246] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 496.827041] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 497.832570] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 497.844170] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 497.854963] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 498.820636] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 498.832232] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 498.843024] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 499.816568] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 499.828163] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 499.838958] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 500.808570] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 500.820165] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 500.830960] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 501.832570] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 501.844175] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 501.854965] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 502.824568] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 502.836171] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 502.846965] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 503.816570] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 503.828176] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 503.838969] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 504.804640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 504.816237] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 504.827033] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 505.828643] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 505.840247] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 505.851043] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 506.820637] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 506.832233] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 506.843026] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 507.816567] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 507.828171] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 507.838965] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 508.808568] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 508.820173] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 508.830969] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 509.832568] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 509.844173] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 509.854967] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 510.824568] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 510.836162] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 510.846954] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 511.816569] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 511.828173] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 511.838968] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 512.804641] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 512.816246] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 512.827040] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 513.828641] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 513.840239] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 513.851035] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 514.824568] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 514.836164] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 514.846959] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 515.812640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 515.824235] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 515.835030] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 515.912427] rcu: INFO: rcu_preempt self-detected stall on CPU +# [ 515.918398] rcu: 0-....: (6452 ticks this GP) idle=6afc/1/0x4000000000000000 softirq=12492/12697 fqs=3179 +# [ 515.929296] rcu: (t=6505 jiffies g=36205 q=58 ncpus=8) +# [ 515.934709] CPU: 0 UID: 0 PID: 126 Comm: sugov:0 Tainted: G W 6.14.0-rc4-gdddf15cff632 #1 +# [ 515.934727] Tainted: [W]=WARN +# [ 515.934732] Hardware name: Google Cheza (rev3+) (DT) +# [ 515.934739] pstate: 00400009 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +# [ 515.934751] pc : rcu_core+0x59c/0xe68 +# [ 515.934769] lr : rcu_core+0x74/0xe68 +# [ 515.934781] sp : ffff800080003e50 +# [ 515.934785] x29: ffff800080003e50 x28: ffff225d038e9bc0 x27: 0000000000000002 +# [ 515.934805] x26: ffffc171a8ee6108 x25: ffffc171a85bc2c0 x24: ffff60ecd691e000 +# [ 515.934820] x23: ffffc171a85d15c0 x22: ffffc171a8f8d780 x21: ffff225e7eeef5c0 +# [ 515.934835] x20: ffffc171a8ef0e80 x19: ffffc171a85d15d1 x18: ffffc171a9461e70 +# [ 515.934850] x17: ffff60ecd691e000 x16: ffff800080000000 x15: 0000000000000000 +# [ 515.934866] x14: ffffc171a85d0780 x13: 0000000000000400 x12: 0000000000000000 +# [ 515.934880] x11: ffffc171a85ce900 x10: ffffc171a8ef5000 x9 : ffffc171a8ef0000 +# [ 515.934894] x8 : ffff800080003d88 x7 : ffffc171a8ee6100 x6 : ffff800080003de0 +# [ 515.934909] x5 : ffff800080003dc8 x4 : 0000000000000003 x3 : 0000000000000000 +# [ 515.934923] x2 : 0000000000000101 x1 : 0000000000000000 x0 : ffff225d038e9bc0 +# [ 515.934939] Call trace: +# [ 515.934945] rcu_core+0x59c/0xe68 (P) +# [ 515.934962] rcu_core_si+0x10/0x1c +# [ 515.934976] handle_softirqs+0x118/0x4b8 +# [ 515.934994] __do_softirq+0x14/0x20 +# [ 515.935007] ____do_softirq+0x10/0x1c +# [ 515.935021] call_on_irq_stack+0x24/0x4c +# [ 515.935034] do_softirq_own_stack+0x1c/0x28 +# [ 515.935048] __irq_exit_rcu+0x174/0x1b4 +# [ 515.935063] irq_exit_rcu+0x10/0x38 +# [ 515.935077] el1_interrupt+0x38/0x64 +# [ 515.935092] el1h_64_irq_handler+0x18/0x24 +# [ 515.935104] el1h_64_irq+0x6c/0x70 +# [ 515.935115] lock_acquire+0x1e0/0x338 (P) +# [ 515.935129] __mutex_lock+0xa8/0x4b8 +# [ 515.935144] mutex_lock_nested+0x24/0x30 +# [ 515.935159] _find_opp_table_unlocked+0x40/0xfc +# [ 515.935174] _find_key+0x64/0x16c +# [ 515.935184] dev_pm_opp_find_freq_exact+0x4c/0x74 +# [ 515.935197] qcom_cpufreq_hw_target_index+0xe8/0x128 +# [ 515.935211] __cpufreq_driver_target+0x144/0x29c +# [ 515.935227] sugov_work+0x58/0x74 +# [ 515.935239] kthread_worker_fn+0xf4/0x324 +# [ 515.935254] kthread+0x12c/0x208 +# [ 515.935266] ret_from_fork+0x10/0x20 +# [ 516.808569] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 516.820174] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 516.830968] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 517.828641] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 517.840236] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 517.851032] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 518.820642] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 518.832237] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 518.843030] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 519.812636] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 519.824231] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 519.835026] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 520.808570] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 520.820165] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 520.830959] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 521.828643] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 521.840238] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 521.851033] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 522.820636] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 522.832232] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 522.843027] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 523.812639] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 523.824239] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 523.835034] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 524.804640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 524.816235] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 524.827026] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 525.828641] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 525.840236] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 525.851031] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 526.820641] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 526.832244] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 526.843041] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 527.812642] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 527.824242] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 527.835038] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 528.804639] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 528.816234] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 528.827027] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 529.832634] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 529.844231] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 529.855017] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 530.820646] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 530.832270] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 530.843065] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 531.812640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 531.824238] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 531.835030] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 532.804640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 532.816237] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 532.827031] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 533.828640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 533.840243] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 533.851037] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 534.820640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 534.832245] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 534.843038] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 535.812641] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 535.824238] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 535.835033] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 536.804639] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 536.816235] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 536.827030] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 537.828640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 537.840234] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 537.851020] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 538.820640] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 538.832235] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 538.843027] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 539.812644] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: hangcheck detected gpu lockup rb 0! +# [ 539.824247] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: completed fence: 45605 +# [ 539.835040] msm_dpu ae01000.display-controller: [drm:hangcheck_handler] *ERROR* 6.3.0.2: submitted fence: 45611 +# [ 540.124426] watchdog: BUG: soft lockup - CPU#0 stuck for 49s! [sugov:0:126] +# [ 540.124439] Modules linked in: +# [ 540.124448] irq event stamp: 9912389 +# [ 540.124453] hardirqs last enabled at (9912388): [] exit_to_kernel_mode+0x38/0x130 +# [ 540.124473] hardirqs last disabled at (9912389): [] el1_interrupt+0x24/0x64 +# [ 540.124486] softirqs last enabled at (9898068): [] handle_softirqs+0x4a0/0x4b8 +# [ 540.124505] softirqs last disabled at (9898071): [] __do_softirq+0x14/0x20 +# [ 540.124525] CPU: 0 UID: 0 PID: 126 Comm: sugov:0 Tainted: G W 6.14.0-rc4-gdddf15cff632 #1 +# [ 540.124540] Tainted: [W]=WARN +# [ 540.124544] Hardware name: Google Cheza (rev3+) (DT) +# [ 540.124549] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +# [ 540.124560] pc : xhci_urb_enqueue+0xbc/0x32c +# [ 540.124573] lr : xhci_urb_enqueue+0xb4/0x32c +# [ 540.124581] sp : ffff800080003c20 +# [ 540.124586] x29: ffff800080003c20 x28: 0000000000000000 x27: ffff225d00b1e6a0 +# [ 540.124602] x26: ffff225d01c3d800 x25: 0000000000000001 x24: 0000000000000006 +# [ 540.124617] x23: ffff225d044dc000 x22: ffff225d044dc000 x21: 0000000000000001 +# [ 540.124632] x20: ffff225d002d7280 x19: ffff225d0573a780 x18: ffff225e7eff0f50 +# [ 540.124647] x17: 000000000000cab0 x16: 0000000000000000 x15: ffff225d0353a000 +# [ 540.124661] x14: 0000000000000000 x13: 0000000000000820 x12: 0000000000000000 +# [ 540.124674] x11: ffff800080003a30 x10: 0000000000000001 x9 : 0000000000000000 +# [ 540.124689] x8 : ffff225d002d7300 x7 : 0000000000000000 x6 : 000000000000003f +# [ 540.124702] x5 : 00000000ffffffff x4 : 0000000000000920 x3 : 0000000000000080 +# [ 540.124716] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff225d002d7280 +# [ 540.124731] Call trace: +# [ 540.124736] xhci_urb_enqueue+0xbc/0x32c (P) +# [ 540.124751] usb_hcd_submit_urb+0x98/0x7fc +# [ 540.124766] usb_submit_urb+0x294/0x560 +# [ 540.124780] intr_callback+0x78/0x1fc +# [ 540.124798] __usb_hcd_giveback_urb+0x68/0x128 +# [ 540.124812] usb_giveback_urb_bh+0xa8/0x140 +# [ 540.124825] process_one_work+0x208/0x5e8 +# [ 540.124840] bh_worker+0x1a8/0x20c +# [ 540.124853] workqueue_softirq_action+0x78/0x88 +# [ 540.124868] tasklet_hi_action+0x14/0x3c +# [ 540.124883] handle_softirqs+0x118/0x4b8 +# [ 540.124897] __do_softirq+0x14/0x20 +# [ 540.124908] ____do_softirq+0x10/0x1c +# [ 540.124922] call_on_irq_stack+0x24/0x4c +# [ 540.124934] do_softirq_own_stack+0x1c/0x28 +# [ 540.124947] __irq_exit_rcu+0x174/0x1b4 +# [ 540.124961] irq_exit_rcu+0x10/0x38 +# [ 540.124976] el1_interrupt+0x38/0x64 +# [ 540.124987] el1h_64_irq_handler+0x18/0x24 +# [ 540.124998] el1h_64_irq+0x6c/0x70 +# [ 540.125009] lock_acquire+0x1e0/0x338 (P) +# [ 540.125023] __mutex_lock+0xa8/0x4b8 +# [ 540.125038] mutex_lock_nested+0x24/0x30 +# [ 540.125052] _find_opp_table_unlocked+0x40/0xfc +# [ 540.125067] _find_key+0x64/0x16c +# [ 540.125078] dev_pm_opp_find_freq_exact+0x4c/0x74 +# [ 540.125090] qcom_cpufreq_hw_target_index+0xe8/0x128 +# [ 540.125105] __cpufreq_driver_target+0x144/0x29c +# [ 540.125121] sugov_work+0x58/0x74 +# [ 540.125133] kthread_worker_fn+0xf4/0x324 +# [ 540.125148] kthread+0x12c/0x208 +# [ 540.125160] ret_from_fork+0x10/0x20 +# [ 540.125176] Kernel panic - not syncing: softlockup: hung tasks +# [ 540.423567] CPU: 0 UID: 0 PID: 126 Comm: sugov:0 Tainted: G W L 6.14.0-rc4-gdddf15cff632 #1 +# [ 540.433411] Tainted: [W]=WARN, [L]=SOFTLOCKUP +# [ 540.437901] Hardware name: Google Cheza (rev3+) (DT) +# [ 540.443022] Call trace: +# [ 540.445559] show_stack+0x18/0x24 (C) +# [ 540.449357] dump_stack_lvl+0x38/0xd0 +# [ 540.453157] dump_stack+0x18/0x24 +# [ 540.456599] panic+0x3bc/0x41c +# [ 540.459767] watchdog_timer_fn+0x254/0x2e4 +# [ 540.464005] __hrtimer_run_queues+0x3c4/0x440 +# [ 540.468508] hrtimer_interrupt+0xe4/0x244 +# [ 540.472662] arch_timer_handler_phys+0x2c/0x44 +# [ 540.477256] handle_percpu_devid_irq+0x90/0x1f0 +# [ 540.481943] handle_irq_desc+0x40/0x58 +# [ 540.485829] generic_handle_domain_irq+0x1c/0x28 +# [ 540.490604] gic_handle_irq+0x4c/0x11c +# [ 540.494483] do_interrupt_handler+0x50/0x84 +# [ 540.498811] el1_interrupt+0x34/0x64 +# [ 540.502518] el1h_64_irq_handler+0x18/0x24 +# [ 540.506758] el1h_64_irq+0x6c/0x70 +# [ 540.510279] xhci_urb_enqueue+0xbc/0x32c (P) +# [ 540.514693] usb_hcd_submit_urb+0x98/0x7fc +# [ 540.518932] usb_submit_urb+0x294/0x560 +# [ 540.522901] intr_callback+0x78/0x1fc +# [ 540.526700] __usb_hcd_giveback_urb+0x68/0x128 +# [ 540.531288] usb_giveback_urb_bh+0xa8/0x140 +# [ 540.535614] process_one_work+0x208/0x5e8 +# [ 540.539769] bh_worker+0x1a8/0x20c +# [ 540.543293] workqueue_softirq_action+0x78/0x88 +# [ 540.547980] tasklet_hi_action+0x14/0x3c +# [ 540.552038] handle_softirqs+0x118/0x4b8 +# [ 540.556096] __do_softirq+0x14/0x20 +# [ 540.559705] ____do_softirq+0x10/0x1c +# [ 540.563500] call_on_irq_stack+0x24/0x4c +# [ 540.567554] do_softirq_own_stack+0x1c/0x28 +# [ 540.571878] __irq_exit_rcu+0x174/0x1b4 +# [ 540.575849] irq_exit_rcu+0x10/0x38 +# [ 540.579462] el1_interrupt+0x38/0x64 +# [ 540.583158] el1h_64_irq_handler+0x18/0x24 +# [ 540.587397] el1h_64_irq+0x6c/0x70 +# [ 540.590918] lock_acquire+0x1e0/0x338 (P) +# [ 540.595060] __mutex_lock+0xa8/0x4b8 +# [ 540.598760] mutex_lock_nested+0x24/0x30 +# [ 540.602818] _find_opp_table_unlocked+0x40/0xfc +# [ 540.607503] _find_key+0x64/0x16c +# [ 540.610940] dev_pm_opp_find_freq_exact+0x4c/0x74 +# [ 540.615798] qcom_cpufreq_hw_target_index+0xe8/0x128 +# [ 540.620924] __cpufreq_driver_target+0x144/0x29c +# [ 540.625698] sugov_work+0x58/0x74 +# [ 540.629134] kthread_worker_fn+0xf4/0x324 +# [ 540.633278] kthread+0x12c/0x208 +# [ 540.636619] ret_from_fork+0x10/0x20 +# [ 540.640321] SMP: stopping secondary CPUs +# [ 540.644518] Kernel Offset: 0x417126200000 from 0xffff800080000000 +# [ 540.650848] PHYS_OFFSET: 0xfff0dda400000000 +# [ 540.655170] CPU features: 0x000,00000100,00901250,8200721b +# [ 540.660829] Memory Limit: none +# [ 540.663999] ---[ end Kernel panic - not syncing: softlockup: hung tasks ]--- diff --git a/drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-skips.txt b/drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-skips.txt index 329770c520d9..9450f2a002fd 100644 --- a/drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-skips.txt +++ b/drivers/gpu/drm/ci/xfails/msm-sm8350-hdk-skips.txt @@ -10,6 +10,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/panfrost-g12b-skips.txt b/drivers/gpu/drm/ci/xfails/panfrost-g12b-skips.txt index 3c7e494857b5..198deea3faa9 100644 --- a/drivers/gpu/drm/ci/xfails/panfrost-g12b-skips.txt +++ b/drivers/gpu/drm/ci/xfails/panfrost-g12b-skips.txt @@ -10,6 +10,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Panfrost is not a KMS driver, so skip the KMS tests kms_.* diff --git a/drivers/gpu/drm/ci/xfails/panfrost-mt8183-skips.txt b/drivers/gpu/drm/ci/xfails/panfrost-mt8183-skips.txt index 3c7e494857b5..198deea3faa9 100644 --- a/drivers/gpu/drm/ci/xfails/panfrost-mt8183-skips.txt +++ b/drivers/gpu/drm/ci/xfails/panfrost-mt8183-skips.txt @@ -10,6 +10,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Panfrost is not a KMS driver, so skip the KMS tests kms_.* diff --git a/drivers/gpu/drm/ci/xfails/panfrost-rk3288-skips.txt b/drivers/gpu/drm/ci/xfails/panfrost-rk3288-skips.txt index feeed89b6c3f..af99ac54c3a5 100644 --- a/drivers/gpu/drm/ci/xfails/panfrost-rk3288-skips.txt +++ b/drivers/gpu/drm/ci/xfails/panfrost-rk3288-skips.txt @@ -13,6 +13,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Panfrost is not a KMS driver, so skip the KMS tests kms_.* diff --git a/drivers/gpu/drm/ci/xfails/panfrost-rk3399-skips.txt b/drivers/gpu/drm/ci/xfails/panfrost-rk3399-skips.txt index feeed89b6c3f..af99ac54c3a5 100644 --- a/drivers/gpu/drm/ci/xfails/panfrost-rk3399-skips.txt +++ b/drivers/gpu/drm/ci/xfails/panfrost-rk3399-skips.txt @@ -13,6 +13,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Panfrost is not a KMS driver, so skip the KMS tests kms_.* diff --git a/drivers/gpu/drm/ci/xfails/rockchip-rk3288-fails.txt b/drivers/gpu/drm/ci/xfails/rockchip-rk3288-fails.txt index ba9160d4d8eb..61122ea7f008 100644 --- a/drivers/gpu/drm/ci/xfails/rockchip-rk3288-fails.txt +++ b/drivers/gpu/drm/ci/xfails/rockchip-rk3288-fails.txt @@ -5,6 +5,5 @@ core_setmaster_vs_auth,Crash dumb_buffer@create-clear,Crash fbdev@pan,Crash kms_cursor_legacy@basic-flip-before-cursor-legacy,Fail -kms_flip@flip-vs-modeset-vs-hang,Crash kms_prop_blob@invalid-set-prop,Crash kms_prop_blob@invalid-set-prop-any,Crash diff --git a/drivers/gpu/drm/ci/xfails/rockchip-rk3288-skips.txt b/drivers/gpu/drm/ci/xfails/rockchip-rk3288-skips.txt index eb16b29dee48..71418ea35a17 100644 --- a/drivers/gpu/drm/ci/xfails/rockchip-rk3288-skips.txt +++ b/drivers/gpu/drm/ci/xfails/rockchip-rk3288-skips.txt @@ -14,6 +14,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/rockchip-rk3399-fails.txt b/drivers/gpu/drm/ci/xfails/rockchip-rk3399-fails.txt index 2803d0d80192..45dd8d493f6e 100644 --- a/drivers/gpu/drm/ci/xfails/rockchip-rk3399-fails.txt +++ b/drivers/gpu/drm/ci/xfails/rockchip-rk3399-fails.txt @@ -2,7 +2,6 @@ dumb_buffer@create-clear,Crash kms_atomic_transition@modeset-transition,Fail kms_atomic_transition@modeset-transition-fencing,Fail kms_atomic_transition@plane-toggle-modeset-transition,Fail -kms_bw@connected-linear-tiling-1-displays-2160x1440p,Fail kms_color@gamma,Fail kms_color@legacy-gamma,Fail kms_cursor_crc@cursor-alpha-opaque,Fail @@ -55,6 +54,7 @@ kms_flip@plain-flip-ts-check,Fail kms_flip@plain-flip-ts-check-interruptible,Fail kms_flip@wf_vblank-ts-check-interruptible,Fail kms_invalid_mode@int-max-clock,Fail +kms_invalid_mode@overflow-vrefresh,Fail kms_lease@lease-uevent,Fail kms_lease@page-flip-implicit-plane,Fail kms_pipe_crc_basic@compare-crc-sanitycheck-nv12,Fail diff --git a/drivers/gpu/drm/ci/xfails/rockchip-rk3399-flakes.txt b/drivers/gpu/drm/ci/xfails/rockchip-rk3399-flakes.txt index 348b4ce7eb4b..b467991d4094 100644 --- a/drivers/gpu/drm/ci/xfails/rockchip-rk3399-flakes.txt +++ b/drivers/gpu/drm/ci/xfails/rockchip-rk3399-flakes.txt @@ -75,58 +75,72 @@ kms_bw@linear-tiling-2-displays-2160x1440p # Linux Version: 6.11.0-rc5 kms_flip@flip-vs-expired-vblank -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/f944dd08-c88c-49ae-aff0-274374550a93@collabora.com/T/#u # Failure Rate: 40 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_bw@linear-tiling-1-displays-2160x1440p -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/afa2d3bf-29f2-488d-8cc9-f30d461444b0@collabora.com/T/#u # Failure Rate: 80 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_plane_multiple@tiling-none -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/6fdaa97f-c1a5-4216-831f-dbb7c5f90498@collabora.com/T/#u # Failure Rate: 100 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_bw@linear-tiling-1-displays-1920x1080p -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/616aa015-9574-4527-9d07-d8d698bbcc3c@collabora.com/T/#u # Failure Rate: 100 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_plane@plane-position-hole-dpms -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/7a1b888f-d7db-4ed7-96cd-3975ace837fb@collabora.com/T/#u # Failure Rate: 100 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_flip@flip-vs-absolute-wf_vblank -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/f17fffb6-abc4-464e-8465-395311b01f6a@collabora.com/T/#u # Failure Rate: 100 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_flip@flip-vs-blocking-wf-vblank -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/9b590b26-1bf9-4951-b6a3-ef6c67e6a1c6@collabora.com/T/#u # Failure Rate: 60 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_bw@linear-tiling-2-displays-1920x1080p -# Board Name: hp-11A-G6-EE-grunt +# Board Name: rk3399-gru-kevin # Bug Report: https://lore.kernel.org/dri-devel/059545fa-65b1-4f5c-a13e-4d2898679f51@collabora.com/T/#u # Failure Rate: 20 # IGT Version: 1.29-g33adea9eb # Linux Version: 6.13.0-rc2 kms_flip@modeset-vs-vblank-race-interruptible + +# Board Name: rk3399-gru-kevin +# Bug Report: https://lore.kernel.org/dri-devel/eece9a80-42f3-41f4-86cc-69d8a51b976a@collabora.com/T/#u +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_bw@connected-linear-tiling-1-displays-2160x1440p + +# Board Name: rk3399-gru-kevin +# Bug Report: https://lore.kernel.org/dri-devel/63dfd5b7-8a54-44a3-9530-f8dcd77a21d1@collabora.com/T/#u +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_bw@linear-tiling-1-displays-3840x2160p diff --git a/drivers/gpu/drm/ci/xfails/rockchip-rk3399-skips.txt b/drivers/gpu/drm/ci/xfails/rockchip-rk3399-skips.txt index e8e994d92557..b83ec75161b2 100644 --- a/drivers/gpu/drm/ci/xfails/rockchip-rk3399-skips.txt +++ b/drivers/gpu/drm/ci/xfails/rockchip-rk3399-skips.txt @@ -14,6 +14,7 @@ nouveau_.* gem_.* i915_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt index c72fee70e739..9749ddb75121 100644 --- a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt +++ b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-fails.txt @@ -157,6 +157,7 @@ kms_flip@plain-flip-ts-check-interruptible,Fail kms_flip@wf_vblank-ts-check,Fail kms_flip@wf_vblank-ts-check-interruptible,Fail kms_invalid_mode@int-max-clock,Fail +kms_invalid_mode@overflow-vrefresh,Fail kms_lease@cursor-implicit-plane,Fail kms_lease@lease-uevent,Fail kms_lease@page-flip-implicit-plane,Fail diff --git a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-skips.txt b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-skips.txt index adbcdd0f28d2..28e37185bac0 100644 --- a/drivers/gpu/drm/ci/xfails/virtio_gpu-none-skips.txt +++ b/drivers/gpu/drm/ci/xfails/virtio_gpu-none-skips.txt @@ -19,6 +19,7 @@ gem_.* i915_.* xe_.* tools_test.* +kms_dp_link_training.* # Currently fails and causes coverage loss for other tests # since core_getversion also fails. diff --git a/drivers/gpu/drm/ci/xfails/vkms-none-flakes.txt b/drivers/gpu/drm/ci/xfails/vkms-none-flakes.txt index 62428f3c8f31..e3ca6da8cde7 100644 --- a/drivers/gpu/drm/ci/xfails/vkms-none-flakes.txt +++ b/drivers/gpu/drm/ci/xfails/vkms-none-flakes.txt @@ -88,3 +88,31 @@ kms_flip@flip-vs-expired-vblank # IGT Version: 1.28-gf13702b8e # Linux Version: 6.10.0-rc5 kms_pipe_crc_basic@nonblocking-crc-frame-sequence + +# Board Name: vkms +# Bug Report: https://lore.kernel.org/dri-devel/2364a6bf-e6bc-4741-8c78-cea8bdb06e03@collabora.com/T/#u +# Failure Rate: 20 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_flip@modeset-vs-vblank-race + +# Board Name: vkms +# Bug Report: https://lore.kernel.org/dri-devel/f7d72ed9-a783-46d7-b75d-54072bda32a3@collabora.com/T/#u +# Failure Rate: 100 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_pipe_crc_basic@suspend-read-crc + +# Board Name: vkms +# Bug Report: https://lore.kernel.org/dri-devel/98d3ba54-bcb9-41ab-adb1-a18ba61ee2e4@collabora.com/T/#u +# Failure Rate: 100 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_plane@plane-panning-bottom-right-suspend + +# Board Name: vkms +# Bug Report: https://lore.kernel.org/dri-devel/b58d15eb-094d-4ac2-aad3-83e518c2f55d@collabora.com/T/#u +# Failure Rate: 100 +# IGT Version: 1.30-g04bedb923 +# Linux Version: 6.14.0-rc4 +kms_vblank@ts-continuation-dpms-suspend diff --git a/drivers/gpu/drm/ci/xfails/vkms-none-skips.txt b/drivers/gpu/drm/ci/xfails/vkms-none-skips.txt index 319789806271..716d2d4e452d 100644 --- a/drivers/gpu/drm/ci/xfails/vkms-none-skips.txt +++ b/drivers/gpu/drm/ci/xfails/vkms-none-skips.txt @@ -1,5 +1,6 @@ # keeps printing vkms_vblank_simulate: vblank timer overrun and never ends kms_invalid_mode@int-max-clock +kms_invalid_mode@overflow-vrefresh # kernel panic seen with kms_cursor_crc tests kms_cursor_crc.* @@ -802,6 +803,7 @@ gem_.* i915_.* xe_.* tools_test.* +kms_dp_link_training.* # IGT issue. is_joiner_mode() should return false for non-Intel hardware. # https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/162 diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 30c736fc0067..7d2e499ea5de 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -98,6 +98,21 @@ struct drm_bridge_connector { * HDMI connector infrastructure, if any (see &DRM_BRIDGE_OP_HDMI). */ struct drm_bridge *bridge_hdmi; + /** + * @bridge_hdmi_audio: + * + * The bridge in the chain that implements necessary support for the + * HDMI Audio infrastructure, if any (see &DRM_BRIDGE_OP_HDMI_AUDIO). + */ + struct drm_bridge *bridge_hdmi_audio; + /** + * @bridge_dp_audio: + * + * The bridge in the chain that implements necessary support for the + * DisplayPort Audio infrastructure, if any (see + * &DRM_BRIDGE_OP_DP_AUDIO). + */ + struct drm_bridge *bridge_dp_audio; }; #define to_drm_bridge_connector(x) \ @@ -433,14 +448,25 @@ static int drm_bridge_connector_audio_startup(struct drm_connector *connector) to_drm_bridge_connector(connector); struct drm_bridge *bridge; - bridge = bridge_connector->bridge_hdmi; - if (!bridge) - return -EINVAL; + if (bridge_connector->bridge_hdmi_audio) { + bridge = bridge_connector->bridge_hdmi_audio; - if (!bridge->funcs->hdmi_audio_startup) - return 0; + if (!bridge->funcs->hdmi_audio_startup) + return 0; - return bridge->funcs->hdmi_audio_startup(connector, bridge); + return bridge->funcs->hdmi_audio_startup(connector, bridge); + } + + if (bridge_connector->bridge_dp_audio) { + bridge = bridge_connector->bridge_dp_audio; + + if (!bridge->funcs->dp_audio_startup) + return 0; + + return bridge->funcs->dp_audio_startup(connector, bridge); + } + + return -EINVAL; } static int drm_bridge_connector_audio_prepare(struct drm_connector *connector, @@ -451,11 +477,19 @@ static int drm_bridge_connector_audio_prepare(struct drm_connector *connector, to_drm_bridge_connector(connector); struct drm_bridge *bridge; - bridge = bridge_connector->bridge_hdmi; - if (!bridge) - return -EINVAL; + if (bridge_connector->bridge_hdmi_audio) { + bridge = bridge_connector->bridge_hdmi_audio; - return bridge->funcs->hdmi_audio_prepare(connector, bridge, fmt, hparms); + return bridge->funcs->hdmi_audio_prepare(connector, bridge, fmt, hparms); + } + + if (bridge_connector->bridge_dp_audio) { + bridge = bridge_connector->bridge_dp_audio; + + return bridge->funcs->dp_audio_prepare(connector, bridge, fmt, hparms); + } + + return -EINVAL; } static void drm_bridge_connector_audio_shutdown(struct drm_connector *connector) @@ -464,11 +498,15 @@ static void drm_bridge_connector_audio_shutdown(struct drm_connector *connector) to_drm_bridge_connector(connector); struct drm_bridge *bridge; - bridge = bridge_connector->bridge_hdmi; - if (!bridge) - return; + if (bridge_connector->bridge_hdmi_audio) { + bridge = bridge_connector->bridge_hdmi_audio; + bridge->funcs->hdmi_audio_shutdown(connector, bridge); + } - bridge->funcs->hdmi_audio_shutdown(connector, bridge); + if (bridge_connector->bridge_dp_audio) { + bridge = bridge_connector->bridge_dp_audio; + bridge->funcs->dp_audio_shutdown(connector, bridge); + } } static int drm_bridge_connector_audio_mute_stream(struct drm_connector *connector, @@ -478,15 +516,27 @@ static int drm_bridge_connector_audio_mute_stream(struct drm_connector *connecto to_drm_bridge_connector(connector); struct drm_bridge *bridge; - bridge = bridge_connector->bridge_hdmi; - if (!bridge) - return -EINVAL; + if (bridge_connector->bridge_hdmi_audio) { + bridge = bridge_connector->bridge_hdmi_audio; + + if (!bridge->funcs->hdmi_audio_mute_stream) + return -ENOTSUPP; - if (bridge->funcs->hdmi_audio_mute_stream) return bridge->funcs->hdmi_audio_mute_stream(connector, bridge, enable, direction); - else - return -ENOTSUPP; + } + + if (bridge_connector->bridge_dp_audio) { + bridge = bridge_connector->bridge_dp_audio; + + if (!bridge->funcs->dp_audio_mute_stream) + return -ENOTSUPP; + + return bridge->funcs->dp_audio_mute_stream(connector, bridge, + enable, direction); + } + + return -EINVAL; } static const struct drm_connector_hdmi_audio_funcs drm_bridge_connector_hdmi_audio_funcs = { @@ -576,6 +626,42 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, max_bpc = bridge->max_bpc; } + if (bridge->ops & DRM_BRIDGE_OP_HDMI_AUDIO) { + if (bridge_connector->bridge_hdmi_audio) + return ERR_PTR(-EBUSY); + + if (bridge_connector->bridge_dp_audio) + return ERR_PTR(-EBUSY); + + if (!bridge->hdmi_audio_max_i2s_playback_channels && + !bridge->hdmi_audio_spdif_playback) + return ERR_PTR(-EINVAL); + + if (!bridge->funcs->hdmi_audio_prepare || + !bridge->funcs->hdmi_audio_shutdown) + return ERR_PTR(-EINVAL); + + bridge_connector->bridge_hdmi_audio = bridge; + } + + if (bridge->ops & DRM_BRIDGE_OP_DP_AUDIO) { + if (bridge_connector->bridge_dp_audio) + return ERR_PTR(-EBUSY); + + if (bridge_connector->bridge_hdmi_audio) + return ERR_PTR(-EBUSY); + + if (!bridge->hdmi_audio_max_i2s_playback_channels && + !bridge->hdmi_audio_spdif_playback) + return ERR_PTR(-EINVAL); + + if (!bridge->funcs->dp_audio_prepare || + !bridge->funcs->dp_audio_shutdown) + return ERR_PTR(-EINVAL); + + bridge_connector->bridge_dp_audio = bridge; + } + if (!drm_bridge_get_next_bridge(bridge)) connector_type = bridge->type; @@ -611,22 +697,6 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, max_bpc); if (ret) return ERR_PTR(ret); - - if (bridge->hdmi_audio_max_i2s_playback_channels || - bridge->hdmi_audio_spdif_playback) { - if (!bridge->funcs->hdmi_audio_prepare || - !bridge->funcs->hdmi_audio_shutdown) - return ERR_PTR(-EINVAL); - - ret = drm_connector_hdmi_audio_init(connector, - bridge->hdmi_audio_dev, - &drm_bridge_connector_hdmi_audio_funcs, - bridge->hdmi_audio_max_i2s_playback_channels, - bridge->hdmi_audio_spdif_playback, - bridge->hdmi_audio_dai_port); - if (ret) - return ERR_PTR(ret); - } } else { ret = drmm_connector_init(drm, connector, &drm_bridge_connector_funcs, @@ -635,6 +705,24 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, return ERR_PTR(ret); } + if (bridge_connector->bridge_hdmi_audio || + bridge_connector->bridge_dp_audio) { + struct device *dev; + + if (bridge_connector->bridge_hdmi_audio) + dev = bridge_connector->bridge_hdmi_audio->hdmi_audio_dev; + else + dev = bridge_connector->bridge_dp_audio->hdmi_audio_dev; + + ret = drm_connector_hdmi_audio_init(connector, dev, + &drm_bridge_connector_hdmi_audio_funcs, + bridge->hdmi_audio_max_i2s_playback_channels, + bridge->hdmi_audio_spdif_playback, + bridge->hdmi_audio_dai_port); + if (ret) + return ERR_PTR(ret); + } + drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); if (bridge_connector->bridge_hpd) diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c index 56a4965e518c..ed31471bd0e2 100644 --- a/drivers/gpu/drm/display/drm_dp_cec.c +++ b/drivers/gpu/drm/display/drm_dp_cec.c @@ -96,7 +96,7 @@ static int drm_dp_cec_adap_enable(struct cec_adapter *adap, bool enable) u32 val = enable ? DP_CEC_TUNNELING_ENABLE : 0; ssize_t err = 0; - err = drm_dp_dpcd_writeb(aux, DP_CEC_TUNNELING_CONTROL, val); + err = drm_dp_dpcd_write_byte(aux, DP_CEC_TUNNELING_CONTROL, val); return (enable && err < 0) ? err : 0; } @@ -112,7 +112,7 @@ static int drm_dp_cec_adap_log_addr(struct cec_adapter *adap, u8 addr) la_mask |= adap->log_addrs.log_addr_mask | (1 << addr); mask[0] = la_mask & 0xff; mask[1] = la_mask >> 8; - err = drm_dp_dpcd_write(aux, DP_CEC_LOGICAL_ADDRESS_MASK, mask, 2); + err = drm_dp_dpcd_write_data(aux, DP_CEC_LOGICAL_ADDRESS_MASK, mask, 2); return (addr != CEC_LOG_ADDR_INVALID && err < 0) ? err : 0; } @@ -123,15 +123,14 @@ static int drm_dp_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, unsigned int retries = min(5, attempts - 1); ssize_t err; - err = drm_dp_dpcd_write(aux, DP_CEC_TX_MESSAGE_BUFFER, - msg->msg, msg->len); + err = drm_dp_dpcd_write_data(aux, DP_CEC_TX_MESSAGE_BUFFER, + msg->msg, msg->len); if (err < 0) return err; - err = drm_dp_dpcd_writeb(aux, DP_CEC_TX_MESSAGE_INFO, - (msg->len - 1) | (retries << 4) | - DP_CEC_TX_MESSAGE_SEND); - return err < 0 ? err : 0; + return drm_dp_dpcd_write_byte(aux, DP_CEC_TX_MESSAGE_INFO, + (msg->len - 1) | (retries << 4) | + DP_CEC_TX_MESSAGE_SEND); } static int drm_dp_cec_adap_monitor_all_enable(struct cec_adapter *adap, @@ -144,13 +143,13 @@ static int drm_dp_cec_adap_monitor_all_enable(struct cec_adapter *adap, if (!(adap->capabilities & CEC_CAP_MONITOR_ALL)) return 0; - err = drm_dp_dpcd_readb(aux, DP_CEC_TUNNELING_CONTROL, &val); - if (err >= 0) { + err = drm_dp_dpcd_read_byte(aux, DP_CEC_TUNNELING_CONTROL, &val); + if (!err) { if (enable) val |= DP_CEC_SNOOPING_ENABLE; else val &= ~DP_CEC_SNOOPING_ENABLE; - err = drm_dp_dpcd_writeb(aux, DP_CEC_TUNNELING_CONTROL, val); + err = drm_dp_dpcd_write_byte(aux, DP_CEC_TUNNELING_CONTROL, val); } return (enable && err < 0) ? err : 0; } @@ -194,7 +193,7 @@ static int drm_dp_cec_received(struct drm_dp_aux *aux) u8 rx_msg_info; ssize_t err; - err = drm_dp_dpcd_readb(aux, DP_CEC_RX_MESSAGE_INFO, &rx_msg_info); + err = drm_dp_dpcd_read_byte(aux, DP_CEC_RX_MESSAGE_INFO, &rx_msg_info); if (err < 0) return err; @@ -202,7 +201,7 @@ static int drm_dp_cec_received(struct drm_dp_aux *aux) return 0; msg.len = (rx_msg_info & DP_CEC_RX_MESSAGE_LEN_MASK) + 1; - err = drm_dp_dpcd_read(aux, DP_CEC_RX_MESSAGE_BUFFER, msg.msg, msg.len); + err = drm_dp_dpcd_read_data(aux, DP_CEC_RX_MESSAGE_BUFFER, msg.msg, msg.len); if (err < 0) return err; @@ -215,7 +214,7 @@ static void drm_dp_cec_handle_irq(struct drm_dp_aux *aux) struct cec_adapter *adap = aux->cec.adap; u8 flags; - if (drm_dp_dpcd_readb(aux, DP_CEC_TUNNELING_IRQ_FLAGS, &flags) < 0) + if (drm_dp_dpcd_read_byte(aux, DP_CEC_TUNNELING_IRQ_FLAGS, &flags) < 0) return; if (flags & DP_CEC_RX_MESSAGE_INFO_VALID) @@ -230,7 +229,7 @@ static void drm_dp_cec_handle_irq(struct drm_dp_aux *aux) (DP_CEC_TX_ADDRESS_NACK_ERROR | DP_CEC_TX_DATA_NACK_ERROR)) cec_transmit_attempt_done(adap, CEC_TX_STATUS_NACK | CEC_TX_STATUS_MAX_RETRIES); - drm_dp_dpcd_writeb(aux, DP_CEC_TUNNELING_IRQ_FLAGS, flags); + drm_dp_dpcd_write_byte(aux, DP_CEC_TUNNELING_IRQ_FLAGS, flags); } /** @@ -253,13 +252,13 @@ void drm_dp_cec_irq(struct drm_dp_aux *aux) if (!aux->cec.adap) goto unlock; - ret = drm_dp_dpcd_readb(aux, DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1, - &cec_irq); + ret = drm_dp_dpcd_read_byte(aux, DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1, + &cec_irq); if (ret < 0 || !(cec_irq & DP_CEC_IRQ)) goto unlock; drm_dp_cec_handle_irq(aux); - drm_dp_dpcd_writeb(aux, DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1, DP_CEC_IRQ); + drm_dp_dpcd_write_byte(aux, DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1, DP_CEC_IRQ); unlock: mutex_unlock(&aux->cec.lock); } @@ -269,7 +268,7 @@ static bool drm_dp_cec_cap(struct drm_dp_aux *aux, u8 *cec_cap) { u8 cap = 0; - if (drm_dp_dpcd_readb(aux, DP_CEC_TUNNELING_CAPABILITY, &cap) != 1 || + if (drm_dp_dpcd_read_byte(aux, DP_CEC_TUNNELING_CAPABILITY, &cap) < 0 || !(cap & DP_CEC_TUNNELING_CAPABLE)) return false; if (cec_cap) diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index dbce1c3f4969..f2a6559a2710 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -327,7 +327,7 @@ static int __read_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_CAP_SI if (offset < DP_RECEIVER_CAP_SIZE) { rd_interval = dpcd[offset]; } else { - if (drm_dp_dpcd_readb(aux, offset, &rd_interval) != 1) { + if (drm_dp_dpcd_read_byte(aux, offset, &rd_interval) < 0) { drm_dbg_kms(aux->drm_dev, "%s: failed rd interval read\n", aux->name); /* arbitrary default delay */ @@ -358,7 +358,7 @@ int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux *aux) int unit; u8 val; - if (drm_dp_dpcd_readb(aux, DP_128B132B_TRAINING_AUX_RD_INTERVAL, &val) != 1) { + if (drm_dp_dpcd_read_byte(aux, DP_128B132B_TRAINING_AUX_RD_INTERVAL, &val) < 0) { drm_err(aux->drm_dev, "%s: failed rd interval read\n", aux->name); /* default to max */ @@ -704,6 +704,8 @@ EXPORT_SYMBOL(drm_dp_dpcd_set_powered); * function returns -EPROTO. Errors from the underlying AUX channel transfer * function, with the exception of -EBUSY (which causes the transaction to * be retried), are propagated to the caller. + * + * In most of the cases you want to use drm_dp_dpcd_read_data() instead. */ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, void *buffer, size_t size) @@ -752,6 +754,8 @@ EXPORT_SYMBOL(drm_dp_dpcd_read); * function returns -EPROTO. Errors from the underlying AUX channel transfer * function, with the exception of -EBUSY (which causes the transaction to * be retried), are propagated to the caller. + * + * In most of the cases you want to use drm_dp_dpcd_write_data() instead. */ ssize_t drm_dp_dpcd_write(struct drm_dp_aux *aux, unsigned int offset, void *buffer, size_t size) @@ -774,14 +778,13 @@ EXPORT_SYMBOL(drm_dp_dpcd_write); * @aux: DisplayPort AUX channel * @status: buffer to store the link status in (must be at least 6 bytes) * - * Returns the number of bytes transferred on success or a negative error - * code on failure. + * Returns a negative error code on failure or 0 on success. */ int drm_dp_dpcd_read_link_status(struct drm_dp_aux *aux, u8 status[DP_LINK_STATUS_SIZE]) { - return drm_dp_dpcd_read(aux, DP_LANE0_1_STATUS, status, - DP_LINK_STATUS_SIZE); + return drm_dp_dpcd_read_data(aux, DP_LANE0_1_STATUS, status, + DP_LINK_STATUS_SIZE); } EXPORT_SYMBOL(drm_dp_dpcd_read_link_status); @@ -804,30 +807,20 @@ int drm_dp_dpcd_read_phy_link_status(struct drm_dp_aux *aux, { int ret; - if (dp_phy == DP_PHY_DPRX) { - ret = drm_dp_dpcd_read(aux, - DP_LANE0_1_STATUS, - link_status, - DP_LINK_STATUS_SIZE); + if (dp_phy == DP_PHY_DPRX) + return drm_dp_dpcd_read_data(aux, + DP_LANE0_1_STATUS, + link_status, + DP_LINK_STATUS_SIZE); - if (ret < 0) - return ret; - - WARN_ON(ret != DP_LINK_STATUS_SIZE); - - return 0; - } - - ret = drm_dp_dpcd_read(aux, - DP_LANE0_1_STATUS_PHY_REPEATER(dp_phy), - link_status, - DP_LINK_STATUS_SIZE - 1); + ret = drm_dp_dpcd_read_data(aux, + DP_LANE0_1_STATUS_PHY_REPEATER(dp_phy), + link_status, + DP_LINK_STATUS_SIZE - 1); if (ret < 0) return ret; - WARN_ON(ret != DP_LINK_STATUS_SIZE - 1); - /* Convert the LTTPR to the sink PHY link status layout */ memmove(&link_status[DP_SINK_STATUS - DP_LANE0_1_STATUS + 1], &link_status[DP_SINK_STATUS - DP_LANE0_1_STATUS], @@ -838,12 +831,81 @@ int drm_dp_dpcd_read_phy_link_status(struct drm_dp_aux *aux, } EXPORT_SYMBOL(drm_dp_dpcd_read_phy_link_status); +/** + * drm_dp_link_power_up() - power up a DisplayPort link + * @aux: DisplayPort AUX channel + * @revision: DPCD revision supported on the link + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_power_up(struct drm_dp_aux *aux, unsigned char revision) +{ + u8 value; + int err; + + /* DP_SET_POWER register is only available on DPCD v1.1 and later */ + if (revision < DP_DPCD_REV_11) + return 0; + + err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (err < 0) + return err; + + value &= ~DP_SET_POWER_MASK; + value |= DP_SET_POWER_D0; + + err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + if (err < 0) + return err; + + /* + * According to the DP 1.1 specification, a "Sink Device must exit the + * power saving state within 1 ms" (Section 2.5.3.1, Table 5-52, "Sink + * Control Field" (register 0x600). + */ + usleep_range(1000, 2000); + + return 0; +} +EXPORT_SYMBOL(drm_dp_link_power_up); + +/** + * drm_dp_link_power_down() - power down a DisplayPort link + * @aux: DisplayPort AUX channel + * @revision: DPCD revision supported on the link + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_power_down(struct drm_dp_aux *aux, unsigned char revision) +{ + u8 value; + int err; + + /* DP_SET_POWER register is only available on DPCD v1.1 and later */ + if (revision < DP_DPCD_REV_11) + return 0; + + err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (err < 0) + return err; + + value &= ~DP_SET_POWER_MASK; + value |= DP_SET_POWER_D3; + + err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + if (err < 0) + return err; + + return 0; +} +EXPORT_SYMBOL(drm_dp_link_power_down); + static int read_payload_update_status(struct drm_dp_aux *aux) { int ret; u8 status; - ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); + ret = drm_dp_dpcd_read_byte(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); if (ret < 0) return ret; @@ -870,21 +932,21 @@ int drm_dp_dpcd_write_payload(struct drm_dp_aux *aux, int ret; int retries = 0; - drm_dp_dpcd_writeb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, - DP_PAYLOAD_TABLE_UPDATED); + drm_dp_dpcd_write_byte(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, + DP_PAYLOAD_TABLE_UPDATED); payload_alloc[0] = vcpid; payload_alloc[1] = start_time_slot; payload_alloc[2] = time_slot_count; - ret = drm_dp_dpcd_write(aux, DP_PAYLOAD_ALLOCATE_SET, payload_alloc, 3); - if (ret != 3) { + ret = drm_dp_dpcd_write_data(aux, DP_PAYLOAD_ALLOCATE_SET, payload_alloc, 3); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "failed to write payload allocation %d\n", ret); goto fail; } retry: - ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); + ret = drm_dp_dpcd_read_byte(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); if (ret < 0) { drm_dbg_kms(aux->drm_dev, "failed to read payload table status %d\n", ret); goto fail; @@ -1040,15 +1102,15 @@ bool drm_dp_send_real_edid_checksum(struct drm_dp_aux *aux, { u8 link_edid_read = 0, auto_test_req = 0, test_resp = 0; - if (drm_dp_dpcd_read(aux, DP_DEVICE_SERVICE_IRQ_VECTOR, - &auto_test_req, 1) < 1) { + if (drm_dp_dpcd_read_byte(aux, DP_DEVICE_SERVICE_IRQ_VECTOR, + &auto_test_req) < 0) { drm_err(aux->drm_dev, "%s: DPCD failed read at register 0x%x\n", aux->name, DP_DEVICE_SERVICE_IRQ_VECTOR); return false; } auto_test_req &= DP_AUTOMATED_TEST_REQUEST; - if (drm_dp_dpcd_read(aux, DP_TEST_REQUEST, &link_edid_read, 1) < 1) { + if (drm_dp_dpcd_read_byte(aux, DP_TEST_REQUEST, &link_edid_read) < 0) { drm_err(aux->drm_dev, "%s: DPCD failed read at register 0x%x\n", aux->name, DP_TEST_REQUEST); return false; @@ -1061,23 +1123,23 @@ bool drm_dp_send_real_edid_checksum(struct drm_dp_aux *aux, return false; } - if (drm_dp_dpcd_write(aux, DP_DEVICE_SERVICE_IRQ_VECTOR, - &auto_test_req, 1) < 1) { + if (drm_dp_dpcd_write_byte(aux, DP_DEVICE_SERVICE_IRQ_VECTOR, + auto_test_req) < 0) { drm_err(aux->drm_dev, "%s: DPCD failed write at register 0x%x\n", aux->name, DP_DEVICE_SERVICE_IRQ_VECTOR); return false; } /* send back checksum for the last edid extension block data */ - if (drm_dp_dpcd_write(aux, DP_TEST_EDID_CHECKSUM, - &real_edid_checksum, 1) < 1) { + if (drm_dp_dpcd_write_byte(aux, DP_TEST_EDID_CHECKSUM, + real_edid_checksum) < 0) { drm_err(aux->drm_dev, "%s: DPCD failed write at register 0x%x\n", aux->name, DP_TEST_EDID_CHECKSUM); return false; } test_resp |= DP_TEST_EDID_CHECKSUM_WRITE; - if (drm_dp_dpcd_write(aux, DP_TEST_RESPONSE, &test_resp, 1) < 1) { + if (drm_dp_dpcd_write_byte(aux, DP_TEST_RESPONSE, test_resp) < 0) { drm_err(aux->drm_dev, "%s: DPCD failed write at register 0x%x\n", aux->name, DP_TEST_RESPONSE); return false; @@ -1114,12 +1176,10 @@ static int drm_dp_read_extended_dpcd_caps(struct drm_dp_aux *aux, DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT)) return 0; - ret = drm_dp_dpcd_read(aux, DP_DP13_DPCD_REV, &dpcd_ext, - sizeof(dpcd_ext)); + ret = drm_dp_dpcd_read_data(aux, DP_DP13_DPCD_REV, &dpcd_ext, + sizeof(dpcd_ext)); if (ret < 0) return ret; - if (ret != sizeof(dpcd_ext)) - return -EIO; if (dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) { drm_dbg_kms(aux->drm_dev, @@ -1156,10 +1216,10 @@ int drm_dp_read_dpcd_caps(struct drm_dp_aux *aux, { int ret; - ret = drm_dp_dpcd_read(aux, DP_DPCD_REV, dpcd, DP_RECEIVER_CAP_SIZE); + ret = drm_dp_dpcd_read_data(aux, DP_DPCD_REV, dpcd, DP_RECEIVER_CAP_SIZE); if (ret < 0) return ret; - if (ret != DP_RECEIVER_CAP_SIZE || dpcd[DP_DPCD_REV] == 0) + if (dpcd[DP_DPCD_REV] == 0) return -EIO; ret = drm_dp_read_extended_dpcd_caps(aux, dpcd); @@ -1209,11 +1269,9 @@ int drm_dp_read_downstream_info(struct drm_dp_aux *aux, if (dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DETAILED_CAP_INFO_AVAILABLE) len *= 4; - ret = drm_dp_dpcd_read(aux, DP_DOWNSTREAM_PORT_0, downstream_ports, len); + ret = drm_dp_dpcd_read_data(aux, DP_DOWNSTREAM_PORT_0, downstream_ports, len); if (ret < 0) return ret; - if (ret != len) - return -EIO; drm_dbg_kms(aux->drm_dev, "%s: DPCD DFP: %*ph\n", aux->name, len, downstream_ports); @@ -1570,7 +1628,7 @@ EXPORT_SYMBOL(drm_dp_downstream_mode); */ int drm_dp_downstream_id(struct drm_dp_aux *aux, char id[6]) { - return drm_dp_dpcd_read(aux, DP_BRANCH_ID, id, 6); + return drm_dp_dpcd_read_data(aux, DP_BRANCH_ID, id, 6); } EXPORT_SYMBOL(drm_dp_downstream_id); @@ -1635,13 +1693,13 @@ void drm_dp_downstream_debug(struct seq_file *m, drm_dp_downstream_id(aux, id); seq_printf(m, "\t\tID: %s\n", id); - len = drm_dp_dpcd_read(aux, DP_BRANCH_HW_REV, &rev[0], 1); - if (len > 0) + len = drm_dp_dpcd_read_data(aux, DP_BRANCH_HW_REV, &rev[0], 1); + if (!len) seq_printf(m, "\t\tHW: %d.%d\n", (rev[0] & 0xf0) >> 4, rev[0] & 0xf); - len = drm_dp_dpcd_read(aux, DP_BRANCH_SW_REV, rev, 2); - if (len > 0) + len = drm_dp_dpcd_read_data(aux, DP_BRANCH_SW_REV, rev, 2); + if (!len) seq_printf(m, "\t\tSW: %d.%d\n", rev[0], rev[1]); if (detailed_cap_info) { @@ -1779,11 +1837,9 @@ int drm_dp_read_sink_count(struct drm_dp_aux *aux) u8 count; int ret; - ret = drm_dp_dpcd_readb(aux, DP_SINK_COUNT, &count); + ret = drm_dp_dpcd_read_byte(aux, DP_SINK_COUNT, &count); if (ret < 0) return ret; - if (ret != 1) - return -EIO; return DP_GET_SINK_COUNT(count); } @@ -2081,14 +2137,17 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, for (i = 0; i < num; i++) { msg.address = msgs[i].addr; - drm_dp_i2c_msg_set_request(&msg, &msgs[i]); - /* Send a bare address packet to start the transaction. - * Zero sized messages specify an address only (bare - * address) transaction. - */ - msg.buffer = NULL; - msg.size = 0; - err = drm_dp_i2c_do_msg(aux, &msg); + + if (!aux->no_zero_sized) { + drm_dp_i2c_msg_set_request(&msg, &msgs[i]); + /* Send a bare address packet to start the transaction. + * Zero sized messages specify an address only (bare + * address) transaction. + */ + msg.buffer = NULL; + msg.size = 0; + err = drm_dp_i2c_do_msg(aux, &msg); + } /* * Reset msg.request in case in case it got @@ -2107,6 +2166,8 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, msg.buffer = msgs[i].buf + j; msg.size = min(transfer_size, msgs[i].len - j); + if (j + msg.size == msgs[i].len && aux->no_zero_sized) + msg.request &= ~DP_AUX_I2C_MOT; err = drm_dp_i2c_drain_msg(aux, &msg); /* @@ -2124,15 +2185,17 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, } if (err >= 0) err = num; - /* Send a bare address packet to close out the transaction. - * Zero sized messages specify an address only (bare - * address) transaction. - */ - msg.request &= ~DP_AUX_I2C_MOT; - msg.buffer = NULL; - msg.size = 0; - (void)drm_dp_i2c_do_msg(aux, &msg); + if (!aux->no_zero_sized) { + /* Send a bare address packet to close out the transaction. + * Zero sized messages specify an address only (bare + * address) transaction. + */ + msg.request &= ~DP_AUX_I2C_MOT; + msg.buffer = NULL; + msg.size = 0; + (void)drm_dp_i2c_do_msg(aux, &msg); + } return err; } @@ -2172,13 +2235,13 @@ static int drm_dp_aux_get_crc(struct drm_dp_aux *aux, u8 *crc) u8 buf, count; int ret; - ret = drm_dp_dpcd_readb(aux, DP_TEST_SINK, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_TEST_SINK, &buf); if (ret < 0) return ret; WARN_ON(!(buf & DP_TEST_SINK_START)); - ret = drm_dp_dpcd_readb(aux, DP_TEST_SINK_MISC, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_TEST_SINK_MISC, &buf); if (ret < 0) return ret; @@ -2192,11 +2255,7 @@ static int drm_dp_aux_get_crc(struct drm_dp_aux *aux, u8 *crc) * At DP_TEST_CRC_R_CR, there's 6 bytes containing CRC data, 2 bytes * per component (RGB or CrYCb). */ - ret = drm_dp_dpcd_read(aux, DP_TEST_CRC_R_CR, crc, 6); - if (ret < 0) - return ret; - - return 0; + return drm_dp_dpcd_read_data(aux, DP_TEST_CRC_R_CR, crc, 6); } static void drm_dp_aux_crc_work(struct work_struct *work) @@ -2395,11 +2454,11 @@ int drm_dp_start_crc(struct drm_dp_aux *aux, struct drm_crtc *crtc) u8 buf; int ret; - ret = drm_dp_dpcd_readb(aux, DP_TEST_SINK, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_TEST_SINK, &buf); if (ret < 0) return ret; - ret = drm_dp_dpcd_writeb(aux, DP_TEST_SINK, buf | DP_TEST_SINK_START); + ret = drm_dp_dpcd_write_byte(aux, DP_TEST_SINK, buf | DP_TEST_SINK_START); if (ret < 0) return ret; @@ -2422,11 +2481,11 @@ int drm_dp_stop_crc(struct drm_dp_aux *aux) u8 buf; int ret; - ret = drm_dp_dpcd_readb(aux, DP_TEST_SINK, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_TEST_SINK, &buf); if (ret < 0) return ret; - ret = drm_dp_dpcd_writeb(aux, DP_TEST_SINK, buf & ~DP_TEST_SINK_START); + ret = drm_dp_dpcd_write_byte(aux, DP_TEST_SINK, buf & ~DP_TEST_SINK_START); if (ret < 0) return ret; @@ -2512,11 +2571,7 @@ drm_dp_get_quirks(const struct drm_dp_dpcd_ident *ident, bool is_branch) static int drm_dp_read_ident(struct drm_dp_aux *aux, unsigned int offset, struct drm_dp_dpcd_ident *ident) { - int ret; - - ret = drm_dp_dpcd_read(aux, offset, ident, sizeof(*ident)); - - return ret < 0 ? ret : 0; + return drm_dp_dpcd_read_data(aux, offset, ident, sizeof(*ident)); } static void drm_dp_dump_desc(struct drm_dp_aux *aux, @@ -2774,13 +2829,11 @@ static int drm_dp_read_lttpr_regs(struct drm_dp_aux *aux, int ret; for (offset = 0; offset < buf_size; offset += block_size) { - ret = drm_dp_dpcd_read(aux, - address + offset, - &buf[offset], block_size); + ret = drm_dp_dpcd_read_data(aux, + address + offset, + &buf[offset], block_size); if (ret < 0) return ret; - - WARN_ON(ret != block_size); } return 0; @@ -2995,12 +3048,12 @@ int drm_dp_get_phy_test_pattern(struct drm_dp_aux *aux, int err; u8 rate, lanes; - err = drm_dp_dpcd_readb(aux, DP_TEST_LINK_RATE, &rate); + err = drm_dp_dpcd_read_byte(aux, DP_TEST_LINK_RATE, &rate); if (err < 0) return err; data->link_rate = drm_dp_bw_code_to_link_rate(rate); - err = drm_dp_dpcd_readb(aux, DP_TEST_LANE_COUNT, &lanes); + err = drm_dp_dpcd_read_byte(aux, DP_TEST_LANE_COUNT, &lanes); if (err < 0) return err; data->num_lanes = lanes & DP_MAX_LANE_COUNT_MASK; @@ -3008,22 +3061,22 @@ int drm_dp_get_phy_test_pattern(struct drm_dp_aux *aux, if (lanes & DP_ENHANCED_FRAME_CAP) data->enhanced_frame_cap = true; - err = drm_dp_dpcd_readb(aux, DP_PHY_TEST_PATTERN, &data->phy_pattern); + err = drm_dp_dpcd_read_byte(aux, DP_PHY_TEST_PATTERN, &data->phy_pattern); if (err < 0) return err; switch (data->phy_pattern) { case DP_PHY_TEST_PATTERN_80BIT_CUSTOM: - err = drm_dp_dpcd_read(aux, DP_TEST_80BIT_CUSTOM_PATTERN_7_0, - &data->custom80, sizeof(data->custom80)); + err = drm_dp_dpcd_read_data(aux, DP_TEST_80BIT_CUSTOM_PATTERN_7_0, + &data->custom80, sizeof(data->custom80)); if (err < 0) return err; break; case DP_PHY_TEST_PATTERN_CP2520: - err = drm_dp_dpcd_read(aux, DP_TEST_HBR2_SCRAMBLER_RESET, - &data->hbr2_reset, - sizeof(data->hbr2_reset)); + err = drm_dp_dpcd_read_data(aux, DP_TEST_HBR2_SCRAMBLER_RESET, + &data->hbr2_reset, + sizeof(data->hbr2_reset)); if (err < 0) return err; } @@ -3050,15 +3103,15 @@ int drm_dp_set_phy_test_pattern(struct drm_dp_aux *aux, if (dp_rev < 0x12) { test_pattern = (test_pattern << 2) & DP_LINK_QUAL_PATTERN_11_MASK; - err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, - test_pattern); + err = drm_dp_dpcd_write_byte(aux, DP_TRAINING_PATTERN_SET, + test_pattern); if (err < 0) return err; } else { for (i = 0; i < data->num_lanes; i++) { - err = drm_dp_dpcd_writeb(aux, - DP_LINK_QUAL_LANE0_SET + i, - test_pattern); + err = drm_dp_dpcd_write_byte(aux, + DP_LINK_QUAL_LANE0_SET + i, + test_pattern); if (err < 0) return err; } @@ -3265,8 +3318,8 @@ bool drm_dp_as_sdp_supported(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_C if (dpcd[DP_DPCD_REV] < DP_DPCD_REV_13) return false; - if (drm_dp_dpcd_readb(aux, DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1, - &rx_feature) != 1) { + if (drm_dp_dpcd_read_byte(aux, DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1, + &rx_feature) < 0) { drm_dbg_dp(aux->drm_dev, "Failed to read DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1\n"); return false; @@ -3290,7 +3343,7 @@ bool drm_dp_vsc_sdp_supported(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIVER_ if (dpcd[DP_DPCD_REV] < DP_DPCD_REV_13) return false; - if (drm_dp_dpcd_readb(aux, DP_DPRX_FEATURE_ENUMERATION_LIST, &rx_feature) != 1) { + if (drm_dp_dpcd_read_byte(aux, DP_DPRX_FEATURE_ENUMERATION_LIST, &rx_feature) < 0) { drm_dbg_dp(aux->drm_dev, "failed to read DP_DPRX_FEATURE_ENUMERATION_LIST\n"); return false; } @@ -3421,16 +3474,13 @@ EXPORT_SYMBOL(drm_dp_get_pcon_max_frl_bw); */ int drm_dp_pcon_frl_prepare(struct drm_dp_aux *aux, bool enable_frl_ready_hpd) { - int ret; u8 buf = DP_PCON_ENABLE_SOURCE_CTL_MODE | DP_PCON_ENABLE_LINK_FRL_MODE; if (enable_frl_ready_hpd) buf |= DP_PCON_ENABLE_HPD_READY; - ret = drm_dp_dpcd_writeb(aux, DP_PCON_HDMI_LINK_CONFIG_1, buf); - - return ret; + return drm_dp_dpcd_write_byte(aux, DP_PCON_HDMI_LINK_CONFIG_1, buf); } EXPORT_SYMBOL(drm_dp_pcon_frl_prepare); @@ -3445,7 +3495,7 @@ bool drm_dp_pcon_is_frl_ready(struct drm_dp_aux *aux) int ret; u8 buf; - ret = drm_dp_dpcd_readb(aux, DP_PCON_HDMI_TX_LINK_STATUS, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_PCON_HDMI_TX_LINK_STATUS, &buf); if (ret < 0) return false; @@ -3474,7 +3524,7 @@ int drm_dp_pcon_frl_configure_1(struct drm_dp_aux *aux, int max_frl_gbps, int ret; u8 buf; - ret = drm_dp_dpcd_readb(aux, DP_PCON_HDMI_LINK_CONFIG_1, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_PCON_HDMI_LINK_CONFIG_1, &buf); if (ret < 0) return ret; @@ -3509,11 +3559,7 @@ int drm_dp_pcon_frl_configure_1(struct drm_dp_aux *aux, int max_frl_gbps, return -EINVAL; } - ret = drm_dp_dpcd_writeb(aux, DP_PCON_HDMI_LINK_CONFIG_1, buf); - if (ret < 0) - return ret; - - return 0; + return drm_dp_dpcd_write_byte(aux, DP_PCON_HDMI_LINK_CONFIG_1, buf); } EXPORT_SYMBOL(drm_dp_pcon_frl_configure_1); @@ -3539,7 +3585,7 @@ int drm_dp_pcon_frl_configure_2(struct drm_dp_aux *aux, int max_frl_mask, else buf &= ~DP_PCON_FRL_LINK_TRAIN_EXTENDED; - ret = drm_dp_dpcd_writeb(aux, DP_PCON_HDMI_LINK_CONFIG_2, buf); + return drm_dp_dpcd_write_byte(aux, DP_PCON_HDMI_LINK_CONFIG_2, buf); if (ret < 0) return ret; @@ -3555,13 +3601,7 @@ EXPORT_SYMBOL(drm_dp_pcon_frl_configure_2); */ int drm_dp_pcon_reset_frl_config(struct drm_dp_aux *aux) { - int ret; - - ret = drm_dp_dpcd_writeb(aux, DP_PCON_HDMI_LINK_CONFIG_1, 0x0); - if (ret < 0) - return ret; - - return 0; + return drm_dp_dpcd_write_byte(aux, DP_PCON_HDMI_LINK_CONFIG_1, 0x0); } EXPORT_SYMBOL(drm_dp_pcon_reset_frl_config); @@ -3576,7 +3616,7 @@ int drm_dp_pcon_frl_enable(struct drm_dp_aux *aux) int ret; u8 buf = 0; - ret = drm_dp_dpcd_readb(aux, DP_PCON_HDMI_LINK_CONFIG_1, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_PCON_HDMI_LINK_CONFIG_1, &buf); if (ret < 0) return ret; if (!(buf & DP_PCON_ENABLE_SOURCE_CTL_MODE)) { @@ -3585,11 +3625,7 @@ int drm_dp_pcon_frl_enable(struct drm_dp_aux *aux) return -EINVAL; } buf |= DP_PCON_ENABLE_HDMI_LINK; - ret = drm_dp_dpcd_writeb(aux, DP_PCON_HDMI_LINK_CONFIG_1, buf); - if (ret < 0) - return ret; - - return 0; + return drm_dp_dpcd_write_byte(aux, DP_PCON_HDMI_LINK_CONFIG_1, buf); } EXPORT_SYMBOL(drm_dp_pcon_frl_enable); @@ -3604,7 +3640,7 @@ bool drm_dp_pcon_hdmi_link_active(struct drm_dp_aux *aux) u8 buf; int ret; - ret = drm_dp_dpcd_readb(aux, DP_PCON_HDMI_TX_LINK_STATUS, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_PCON_HDMI_TX_LINK_STATUS, &buf); if (ret < 0) return false; @@ -3629,7 +3665,7 @@ int drm_dp_pcon_hdmi_link_mode(struct drm_dp_aux *aux, u8 *frl_trained_mask) int mode; int ret; - ret = drm_dp_dpcd_readb(aux, DP_PCON_HDMI_POST_FRL_STATUS, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_PCON_HDMI_POST_FRL_STATUS, &buf); if (ret < 0) return ret; @@ -3658,7 +3694,7 @@ void drm_dp_pcon_hdmi_frl_link_error_count(struct drm_dp_aux *aux, struct drm_hdmi_info *hdmi = &connector->display_info.hdmi; for (i = 0; i < hdmi->max_lanes; i++) { - if (drm_dp_dpcd_readb(aux, DP_PCON_HDMI_ERROR_STATUS_LN0 + i, &buf) < 0) + if (drm_dp_dpcd_read_byte(aux, DP_PCON_HDMI_ERROR_STATUS_LN0 + i, &buf) < 0) return; error_count = buf & DP_PCON_HDMI_ERROR_COUNT_MASK; @@ -3793,7 +3829,7 @@ int drm_dp_pcon_configure_dsc_enc(struct drm_dp_aux *aux, u8 pps_buf_config) u8 buf; int ret; - ret = drm_dp_dpcd_readb(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, &buf); if (ret < 0) return ret; @@ -3804,11 +3840,7 @@ int drm_dp_pcon_configure_dsc_enc(struct drm_dp_aux *aux, u8 pps_buf_config) buf |= pps_buf_config << 2; } - ret = drm_dp_dpcd_writeb(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, buf); - if (ret < 0) - return ret; - - return 0; + return drm_dp_dpcd_write_byte(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, buf); } /** @@ -3820,13 +3852,7 @@ int drm_dp_pcon_configure_dsc_enc(struct drm_dp_aux *aux, u8 pps_buf_config) */ int drm_dp_pcon_pps_default(struct drm_dp_aux *aux) { - int ret; - - ret = drm_dp_pcon_configure_dsc_enc(aux, DP_PCON_ENC_PPS_OVERRIDE_DISABLED); - if (ret < 0) - return ret; - - return 0; + return drm_dp_pcon_configure_dsc_enc(aux, DP_PCON_ENC_PPS_OVERRIDE_DISABLED); } EXPORT_SYMBOL(drm_dp_pcon_pps_default); @@ -3842,15 +3868,11 @@ int drm_dp_pcon_pps_override_buf(struct drm_dp_aux *aux, u8 pps_buf[128]) { int ret; - ret = drm_dp_dpcd_write(aux, DP_PCON_HDMI_PPS_OVERRIDE_BASE, &pps_buf, 128); + ret = drm_dp_dpcd_write_data(aux, DP_PCON_HDMI_PPS_OVERRIDE_BASE, &pps_buf, 128); if (ret < 0) return ret; - ret = drm_dp_pcon_configure_dsc_enc(aux, DP_PCON_ENC_PPS_OVERRIDE_EN_BUFFER); - if (ret < 0) - return ret; - - return 0; + return drm_dp_pcon_configure_dsc_enc(aux, DP_PCON_ENC_PPS_OVERRIDE_EN_BUFFER); } EXPORT_SYMBOL(drm_dp_pcon_pps_override_buf); @@ -3867,21 +3889,17 @@ int drm_dp_pcon_pps_override_param(struct drm_dp_aux *aux, u8 pps_param[6]) { int ret; - ret = drm_dp_dpcd_write(aux, DP_PCON_HDMI_PPS_OVRD_SLICE_HEIGHT, &pps_param[0], 2); + ret = drm_dp_dpcd_write_data(aux, DP_PCON_HDMI_PPS_OVRD_SLICE_HEIGHT, &pps_param[0], 2); if (ret < 0) return ret; - ret = drm_dp_dpcd_write(aux, DP_PCON_HDMI_PPS_OVRD_SLICE_WIDTH, &pps_param[2], 2); + ret = drm_dp_dpcd_write_data(aux, DP_PCON_HDMI_PPS_OVRD_SLICE_WIDTH, &pps_param[2], 2); if (ret < 0) return ret; - ret = drm_dp_dpcd_write(aux, DP_PCON_HDMI_PPS_OVRD_BPP, &pps_param[4], 2); + ret = drm_dp_dpcd_write_data(aux, DP_PCON_HDMI_PPS_OVRD_BPP, &pps_param[4], 2); if (ret < 0) return ret; - ret = drm_dp_pcon_configure_dsc_enc(aux, DP_PCON_ENC_PPS_OVERRIDE_EN_BUFFER); - if (ret < 0) - return ret; - - return 0; + return drm_dp_pcon_configure_dsc_enc(aux, DP_PCON_ENC_PPS_OVERRIDE_EN_BUFFER); } EXPORT_SYMBOL(drm_dp_pcon_pps_override_param); @@ -3897,7 +3915,7 @@ int drm_dp_pcon_convert_rgb_to_ycbcr(struct drm_dp_aux *aux, u8 color_spc) int ret; u8 buf; - ret = drm_dp_dpcd_readb(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, &buf); + ret = drm_dp_dpcd_read_byte(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, &buf); if (ret < 0) return ret; @@ -3906,11 +3924,7 @@ int drm_dp_pcon_convert_rgb_to_ycbcr(struct drm_dp_aux *aux, u8 color_spc) else buf &= ~DP_CONVERSION_RGB_YCBCR_MASK; - ret = drm_dp_dpcd_writeb(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, buf); - if (ret < 0) - return ret; - - return 0; + return drm_dp_dpcd_write_byte(aux, DP_PROTOCOL_CONVERTER_CONTROL_2, buf); } EXPORT_SYMBOL(drm_dp_pcon_convert_rgb_to_ycbcr); @@ -3942,12 +3956,12 @@ int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_bac buf[0] = level; } - ret = drm_dp_dpcd_write(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, sizeof(buf)); - if (ret != sizeof(buf)) { + ret = drm_dp_dpcd_write_data(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, sizeof(buf)); + if (ret < 0) { drm_err(aux->drm_dev, "%s: Failed to write aux backlight level: %d\n", aux->name, ret); - return ret < 0 ? ret : -EIO; + return ret; } return 0; @@ -3965,22 +3979,22 @@ drm_edp_backlight_set_enable(struct drm_dp_aux *aux, const struct drm_edp_backli if (!bl->aux_enable) return 0; - ret = drm_dp_dpcd_readb(aux, DP_EDP_DISPLAY_CONTROL_REGISTER, &buf); - if (ret != 1) { + ret = drm_dp_dpcd_read_byte(aux, DP_EDP_DISPLAY_CONTROL_REGISTER, &buf); + if (ret < 0) { drm_err(aux->drm_dev, "%s: Failed to read eDP display control register: %d\n", aux->name, ret); - return ret < 0 ? ret : -EIO; + return ret; } if (enable) buf |= DP_EDP_BACKLIGHT_ENABLE; else buf &= ~DP_EDP_BACKLIGHT_ENABLE; - ret = drm_dp_dpcd_writeb(aux, DP_EDP_DISPLAY_CONTROL_REGISTER, buf); - if (ret != 1) { + ret = drm_dp_dpcd_write_byte(aux, DP_EDP_DISPLAY_CONTROL_REGISTER, buf); + if (ret < 0) { drm_err(aux->drm_dev, "%s: Failed to write eDP display control register: %d\n", aux->name, ret); - return ret < 0 ? ret : -EIO; + return ret; } return 0; @@ -4016,15 +4030,16 @@ int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backli dpcd_buf = DP_EDP_BACKLIGHT_CONTROL_MODE_PWM; if (bl->pwmgen_bit_count) { - ret = drm_dp_dpcd_writeb(aux, DP_EDP_PWMGEN_BIT_COUNT, bl->pwmgen_bit_count); - if (ret != 1) + ret = drm_dp_dpcd_write_byte(aux, DP_EDP_PWMGEN_BIT_COUNT, bl->pwmgen_bit_count); + if (ret < 0) drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux pwmgen bit count: %d\n", aux->name, ret); } if (bl->pwm_freq_pre_divider) { - ret = drm_dp_dpcd_writeb(aux, DP_EDP_BACKLIGHT_FREQ_SET, bl->pwm_freq_pre_divider); - if (ret != 1) + ret = drm_dp_dpcd_write_byte(aux, DP_EDP_BACKLIGHT_FREQ_SET, + bl->pwm_freq_pre_divider); + if (ret < 0) drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux backlight frequency: %d\n", aux->name, ret); @@ -4032,8 +4047,8 @@ int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backli dpcd_buf |= DP_EDP_BACKLIGHT_FREQ_AUX_SET_ENABLE; } - ret = drm_dp_dpcd_writeb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf); - if (ret != 1) { + ret = drm_dp_dpcd_write_byte(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux backlight mode: %d\n", aux->name, ret); return ret < 0 ? ret : -EIO; @@ -4088,8 +4103,8 @@ drm_edp_backlight_probe_max(struct drm_dp_aux *aux, struct drm_edp_backlight_inf if (!bl->aux_set) return 0; - ret = drm_dp_dpcd_readb(aux, DP_EDP_PWMGEN_BIT_COUNT, &pn); - if (ret != 1) { + ret = drm_dp_dpcd_read_byte(aux, DP_EDP_PWMGEN_BIT_COUNT, &pn); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "%s: Failed to read pwmgen bit count cap: %d\n", aux->name, ret); return -ENODEV; @@ -4122,14 +4137,14 @@ drm_edp_backlight_probe_max(struct drm_dp_aux *aux, struct drm_edp_backlight_inf * - FxP is within 25% of desired value. * Note: 25% is arbitrary value and may need some tweak. */ - ret = drm_dp_dpcd_readb(aux, DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN, &pn_min); - if (ret != 1) { + ret = drm_dp_dpcd_read_byte(aux, DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN, &pn_min); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "%s: Failed to read pwmgen bit count cap min: %d\n", aux->name, ret); return 0; } - ret = drm_dp_dpcd_readb(aux, DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX, &pn_max); - if (ret != 1) { + ret = drm_dp_dpcd_read_byte(aux, DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX, &pn_max); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "%s: Failed to read pwmgen bit count cap max: %d\n", aux->name, ret); return 0; @@ -4154,8 +4169,8 @@ drm_edp_backlight_probe_max(struct drm_dp_aux *aux, struct drm_edp_backlight_inf break; } - ret = drm_dp_dpcd_writeb(aux, DP_EDP_PWMGEN_BIT_COUNT, pn); - if (ret != 1) { + ret = drm_dp_dpcd_write_byte(aux, DP_EDP_PWMGEN_BIT_COUNT, pn); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux pwmgen bit count: %d\n", aux->name, ret); return 0; @@ -4180,8 +4195,8 @@ drm_edp_backlight_probe_state(struct drm_dp_aux *aux, struct drm_edp_backlight_i u8 buf[2]; u8 mode_reg; - ret = drm_dp_dpcd_readb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &mode_reg); - if (ret != 1) { + ret = drm_dp_dpcd_read_byte(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &mode_reg); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "%s: Failed to read backlight mode: %d\n", aux->name, ret); return ret < 0 ? ret : -EIO; @@ -4194,11 +4209,11 @@ drm_edp_backlight_probe_state(struct drm_dp_aux *aux, struct drm_edp_backlight_i if (*current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { int size = 1 + bl->lsb_reg_used; - ret = drm_dp_dpcd_read(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, size); - if (ret != size) { + ret = drm_dp_dpcd_read_data(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, size); + if (ret < 0) { drm_dbg_kms(aux->drm_dev, "%s: Failed to read backlight level: %d\n", aux->name, ret); - return ret < 0 ? ret : -EIO; + return ret; } if (bl->lsb_reg_used) @@ -4343,8 +4358,8 @@ int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux) if (!panel || !panel->dev || !aux) return -EINVAL; - ret = drm_dp_dpcd_read(aux, DP_EDP_DPCD_REV, edp_dpcd, - EDP_DISPLAY_CTL_CAP_SIZE); + ret = drm_dp_dpcd_read_data(aux, DP_EDP_DPCD_REV, edp_dpcd, + EDP_DISPLAY_CTL_CAP_SIZE); if (ret < 0) return ret; @@ -4385,8 +4400,9 @@ EXPORT_SYMBOL(drm_panel_dp_aux_backlight); #endif /* See DP Standard v2.1 2.6.4.4.1.1, 2.8.4.4, 2.8.7 */ -static int drm_dp_link_symbol_cycles(int lane_count, int pixels, int bpp_x16, - int symbol_size, bool is_mst) +static int drm_dp_link_data_symbol_cycles(int lane_count, int pixels, + int bpp_x16, int symbol_size, + bool is_mst) { int cycles = DIV_ROUND_UP(pixels * bpp_x16, 16 * symbol_size * lane_count); int align = is_mst ? 4 / lane_count : 1; @@ -4394,22 +4410,42 @@ static int drm_dp_link_symbol_cycles(int lane_count, int pixels, int bpp_x16, return ALIGN(cycles, align); } -static int drm_dp_link_dsc_symbol_cycles(int lane_count, int pixels, int slice_count, - int bpp_x16, int symbol_size, bool is_mst) +/** + * drm_dp_link_symbol_cycles - calculate the link symbol count with/without dsc + * @lane_count: DP link lane count + * @pixels: number of pixels in a scanline + * @dsc_slice_count: number of slices for DSC or '0' for non-DSC + * @bpp_x16: bits per pixel in .4 binary fixed format + * @symbol_size: DP symbol size + * @is_mst: %true for MST and %false for SST + * + * Calculate the link symbol cycles for both DSC (@dsc_slice_count !=0) and + * non-DSC case (@dsc_slice_count == 0) and return the count. + */ +int drm_dp_link_symbol_cycles(int lane_count, int pixels, int dsc_slice_count, + int bpp_x16, int symbol_size, bool is_mst) { + int slice_count = dsc_slice_count ? : 1; int slice_pixels = DIV_ROUND_UP(pixels, slice_count); - int slice_data_cycles = drm_dp_link_symbol_cycles(lane_count, slice_pixels, - bpp_x16, symbol_size, is_mst); - int slice_eoc_cycles = is_mst ? 4 / lane_count : 1; + int slice_data_cycles = drm_dp_link_data_symbol_cycles(lane_count, + slice_pixels, + bpp_x16, + symbol_size, + is_mst); + int slice_eoc_cycles = 0; + + if (dsc_slice_count) + slice_eoc_cycles = is_mst ? 4 / lane_count : 1; return slice_count * (slice_data_cycles + slice_eoc_cycles); } +EXPORT_SYMBOL(drm_dp_link_symbol_cycles); /** * drm_dp_bw_overhead - Calculate the BW overhead of a DP link stream * @lane_count: DP link lane count * @hactive: pixel count of the active period in one scanline of the stream - * @dsc_slice_count: DSC slice count if @flags/DRM_DP_LINK_BW_OVERHEAD_DSC is set + * @dsc_slice_count: number of slices for DSC or '0' for non-DSC * @bpp_x16: bits per pixel in .4 binary fixed point * @flags: DRM_DP_OVERHEAD_x flags * @@ -4423,7 +4459,7 @@ static int drm_dp_link_dsc_symbol_cycles(int lane_count, int pixels, int slice_c * as well as the stream's * - @hactive timing * - @bpp_x16 color depth - * - compression mode (@flags / %DRM_DP_OVERHEAD_DSC). + * - compression mode (@dsc_slice_count != 0) * Note that this overhead doesn't account for the 8b/10b, 128b/132b * channel coding efficiency, for that see * @drm_dp_link_bw_channel_coding_efficiency(). @@ -4478,15 +4514,10 @@ int drm_dp_bw_overhead(int lane_count, int hactive, WARN_ON((flags & DRM_DP_BW_OVERHEAD_UHBR) && (flags & DRM_DP_BW_OVERHEAD_FEC)); - if (flags & DRM_DP_BW_OVERHEAD_DSC) - symbol_cycles = drm_dp_link_dsc_symbol_cycles(lane_count, hactive, - dsc_slice_count, - bpp_x16, symbol_size, - is_mst); - else - symbol_cycles = drm_dp_link_symbol_cycles(lane_count, hactive, - bpp_x16, symbol_size, - is_mst); + symbol_cycles = drm_dp_link_symbol_cycles(lane_count, hactive, + dsc_slice_count, + bpp_x16, symbol_size, + is_mst); return DIV_ROUND_UP_ULL(mul_u32_u32(symbol_cycles * symbol_size * lane_count, overhead * 16), diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 3a1f1ffc7b55..a89f38fd3218 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -2192,24 +2192,20 @@ static int drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, guid_t *guid) guid_copy(&mstb->guid, guid); if (!drm_dp_validate_guid(mstb->mgr, &mstb->guid)) { + struct drm_dp_aux *aux; u8 buf[UUID_SIZE]; export_guid(buf, &mstb->guid); - if (mstb->port_parent) { - ret = drm_dp_send_dpcd_write(mstb->mgr, - mstb->port_parent, - DP_GUID, sizeof(buf), buf); - } else { - ret = drm_dp_dpcd_write(mstb->mgr->aux, - DP_GUID, buf, sizeof(buf)); - } + if (mstb->port_parent) + aux = &mstb->port_parent->aux; + else + aux = mstb->mgr->aux; + + ret = drm_dp_dpcd_write_data(aux, DP_GUID, buf, sizeof(buf)); } - if (ret < 16 && ret > 0) - return -EPROTO; - - return ret == 16 ? 0 : ret; + return ret; } static void build_mst_prop_path(const struct drm_dp_mst_branch *mstb, @@ -2744,14 +2740,13 @@ retry: do { tosend = min3(mgr->max_dpcd_transaction_bytes, 16, total); - ret = drm_dp_dpcd_write(mgr->aux, regbase + offset, - &msg[offset], - tosend); - if (ret != tosend) { - if (ret == -EIO && retries < 5) { - retries++; - goto retry; - } + ret = drm_dp_dpcd_write_data(mgr->aux, regbase + offset, + &msg[offset], + tosend); + if (ret == -EIO && retries < 5) { + retries++; + goto retry; + } else if (ret < 0) { drm_dbg_kms(mgr->dev, "failed to dpcd write %d %d\n", tosend, ret); return -EIO; @@ -3624,7 +3619,7 @@ enum drm_dp_mst_mode drm_dp_read_mst_cap(struct drm_dp_aux *aux, if (dpcd[DP_DPCD_REV] < DP_DPCD_REV_12) return DRM_DP_SST; - if (drm_dp_dpcd_readb(aux, DP_MSTM_CAP, &mstm_cap) != 1) + if (drm_dp_dpcd_read_byte(aux, DP_MSTM_CAP, &mstm_cap) < 0) return DRM_DP_SST; if (mstm_cap & DP_MST_CAP) @@ -3679,10 +3674,10 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms mgr->mst_primary = mstb; drm_dp_mst_topology_get_mstb(mgr->mst_primary); - ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, - DP_MST_EN | - DP_UP_REQ_EN | - DP_UPSTREAM_IS_SRC); + ret = drm_dp_dpcd_write_byte(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | + DP_UP_REQ_EN | + DP_UPSTREAM_IS_SRC); if (ret < 0) goto out_unlock; @@ -3697,7 +3692,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms mstb = mgr->mst_primary; mgr->mst_primary = NULL; /* this can fail if the device is gone */ - drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0); + drm_dp_dpcd_write_byte(mgr->aux, DP_MSTM_CTRL, 0); ret = 0; mgr->payload_id_table_cleared = false; @@ -3763,8 +3758,8 @@ EXPORT_SYMBOL(drm_dp_mst_topology_queue_probe); void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr) { mutex_lock(&mgr->lock); - drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, - DP_MST_EN | DP_UPSTREAM_IS_SRC); + drm_dp_dpcd_write_byte(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | DP_UPSTREAM_IS_SRC); mutex_unlock(&mgr->lock); flush_work(&mgr->up_req_work); flush_work(&mgr->work); @@ -3813,18 +3808,18 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, goto out_fail; } - ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, - DP_MST_EN | - DP_UP_REQ_EN | - DP_UPSTREAM_IS_SRC); + ret = drm_dp_dpcd_write_byte(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | + DP_UP_REQ_EN | + DP_UPSTREAM_IS_SRC); if (ret < 0) { drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n"); goto out_fail; } /* Some hubs forget their guids after they resume */ - ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, buf, sizeof(buf)); - if (ret != sizeof(buf)) { + ret = drm_dp_dpcd_read_data(mgr->aux, DP_GUID, buf, sizeof(buf)); + if (ret < 0) { drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); goto out_fail; } @@ -3883,8 +3878,8 @@ drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up, *mstb = NULL; len = min(mgr->max_dpcd_transaction_bytes, 16); - ret = drm_dp_dpcd_read(mgr->aux, basereg, replyblock, len); - if (ret != len) { + ret = drm_dp_dpcd_read_data(mgr->aux, basereg, replyblock, len); + if (ret < 0) { drm_dbg_kms(mgr->dev, "failed to read DPCD down rep %d %d\n", len, ret); return false; } @@ -3922,9 +3917,9 @@ drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up, curreply = len; while (replylen > 0) { len = min3(replylen, mgr->max_dpcd_transaction_bytes, 16); - ret = drm_dp_dpcd_read(mgr->aux, basereg + curreply, - replyblock, len); - if (ret != len) { + ret = drm_dp_dpcd_read_data(mgr->aux, basereg + curreply, + replyblock, len); + if (ret < 0) { drm_dbg_kms(mgr->dev, "failed to read a chunk (len %d, ret %d)\n", len, ret); return false; @@ -4881,9 +4876,9 @@ static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr, int i; for (i = 0; i < DP_PAYLOAD_TABLE_SIZE; i += 16) { - if (drm_dp_dpcd_read(mgr->aux, - DP_PAYLOAD_TABLE_UPDATE_STATUS + i, - &buf[i], 16) != 16) + if (drm_dp_dpcd_read_data(mgr->aux, + DP_PAYLOAD_TABLE_UPDATE_STATUS + i, + &buf[i], 16) < 0) return false; } return true; @@ -4972,23 +4967,24 @@ void drm_dp_mst_dump_topology(struct seq_file *m, } seq_printf(m, "dpcd: %*ph\n", DP_RECEIVER_CAP_SIZE, buf); - ret = drm_dp_dpcd_read(mgr->aux, DP_FAUX_CAP, buf, 2); - if (ret != 2) { + ret = drm_dp_dpcd_read_data(mgr->aux, DP_FAUX_CAP, buf, 2); + if (ret < 0) { seq_printf(m, "faux/mst read failed\n"); goto out; } seq_printf(m, "faux/mst: %*ph\n", 2, buf); - ret = drm_dp_dpcd_read(mgr->aux, DP_MSTM_CTRL, buf, 1); - if (ret != 1) { + ret = drm_dp_dpcd_read_data(mgr->aux, DP_MSTM_CTRL, buf, 1); + if (ret < 0) { seq_printf(m, "mst ctrl read failed\n"); goto out; } seq_printf(m, "mst ctrl: %*ph\n", 1, buf); /* dump the standard OUI branch header */ - ret = drm_dp_dpcd_read(mgr->aux, DP_BRANCH_OUI, buf, DP_BRANCH_OUI_HEADER_SIZE); - if (ret != DP_BRANCH_OUI_HEADER_SIZE) { + ret = drm_dp_dpcd_read_data(mgr->aux, DP_BRANCH_OUI, buf, + DP_BRANCH_OUI_HEADER_SIZE); + if (ret < 0) { seq_printf(m, "branch oui read failed\n"); goto out; } @@ -6112,14 +6108,14 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) /* DP-to-DP peer device */ if (drm_dp_mst_is_virtual_dpcd(immediate_upstream_port)) { - if (drm_dp_dpcd_read(&port->aux, - DP_DSC_SUPPORT, &endpoint_dsc, 1) != 1) + if (drm_dp_dpcd_read_data(&port->aux, + DP_DSC_SUPPORT, &endpoint_dsc, 1) < 0) return NULL; - if (drm_dp_dpcd_read(&port->aux, - DP_FEC_CAPABILITY, &endpoint_fec, 1) != 1) + if (drm_dp_dpcd_read_data(&port->aux, + DP_FEC_CAPABILITY, &endpoint_fec, 1) < 0) return NULL; - if (drm_dp_dpcd_read(&immediate_upstream_port->aux, - DP_DSC_SUPPORT, &upstream_dsc, 1) != 1) + if (drm_dp_dpcd_read_data(&immediate_upstream_port->aux, + DP_DSC_SUPPORT, &upstream_dsc, 1) < 0) return NULL; /* Enpoint decompression with DP-to-DP peer device */ @@ -6157,8 +6153,8 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD)) { u8 dpcd_ext[DP_RECEIVER_CAP_SIZE]; - if (drm_dp_dpcd_read(immediate_upstream_aux, - DP_DSC_SUPPORT, &upstream_dsc, 1) != 1) + if (drm_dp_dpcd_read_data(immediate_upstream_aux, + DP_DSC_SUPPORT, &upstream_dsc, 1) < 0) return NULL; if (!(upstream_dsc & DP_DSC_DECOMPRESSION_IS_SUPPORTED)) @@ -6180,11 +6176,11 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) * therefore the endpoint needs to be * both DSC and FEC capable. */ - if (drm_dp_dpcd_read(&port->aux, - DP_DSC_SUPPORT, &endpoint_dsc, 1) != 1) + if (drm_dp_dpcd_read_data(&port->aux, + DP_DSC_SUPPORT, &endpoint_dsc, 1) < 0) return NULL; - if (drm_dp_dpcd_read(&port->aux, - DP_FEC_CAPABILITY, &endpoint_fec, 1) != 1) + if (drm_dp_dpcd_read_data(&port->aux, + DP_FEC_CAPABILITY, &endpoint_fec, 1) < 0) return NULL; if ((endpoint_dsc & DP_DSC_DECOMPRESSION_IS_SUPPORTED) && (endpoint_fec & DP_FEC_CAPABLE)) diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c index 90fe07a89260..076edf161048 100644 --- a/drivers/gpu/drm/display/drm_dp_tunnel.c +++ b/drivers/gpu/drm/display/drm_dp_tunnel.c @@ -222,7 +222,7 @@ static int read_tunnel_regs(struct drm_dp_aux *aux, struct drm_dp_tunnel_regs *r while ((len = next_reg_area(&offset))) { int address = DP_TUNNELING_BASE + offset; - if (drm_dp_dpcd_read(aux, address, tunnel_reg_ptr(regs, address), len) < 0) + if (drm_dp_dpcd_read_data(aux, address, tunnel_reg_ptr(regs, address), len) < 0) return -EIO; offset += len; @@ -913,7 +913,7 @@ static int set_bw_alloc_mode(struct drm_dp_tunnel *tunnel, bool enable) u8 mask = DP_DISPLAY_DRIVER_BW_ALLOCATION_MODE_ENABLE | DP_UNMASK_BW_ALLOCATION_IRQ; u8 val; - if (drm_dp_dpcd_readb(tunnel->aux, DP_DPTX_BW_ALLOCATION_MODE_CONTROL, &val) < 0) + if (drm_dp_dpcd_read_byte(tunnel->aux, DP_DPTX_BW_ALLOCATION_MODE_CONTROL, &val) < 0) goto out_err; if (enable) @@ -921,7 +921,7 @@ static int set_bw_alloc_mode(struct drm_dp_tunnel *tunnel, bool enable) else val &= ~mask; - if (drm_dp_dpcd_writeb(tunnel->aux, DP_DPTX_BW_ALLOCATION_MODE_CONTROL, val) < 0) + if (drm_dp_dpcd_write_byte(tunnel->aux, DP_DPTX_BW_ALLOCATION_MODE_CONTROL, val) < 0) goto out_err; tunnel->bw_alloc_enabled = enable; @@ -1039,7 +1039,7 @@ static int clear_bw_req_state(struct drm_dp_aux *aux) { u8 bw_req_mask = DP_BW_REQUEST_SUCCEEDED | DP_BW_REQUEST_FAILED; - if (drm_dp_dpcd_writeb(aux, DP_TUNNELING_STATUS, bw_req_mask) < 0) + if (drm_dp_dpcd_write_byte(aux, DP_TUNNELING_STATUS, bw_req_mask) < 0) return -EIO; return 0; @@ -1052,7 +1052,7 @@ static int bw_req_complete(struct drm_dp_aux *aux, bool *status_changed) u8 val; int err; - if (drm_dp_dpcd_readb(aux, DP_TUNNELING_STATUS, &val) < 0) + if (drm_dp_dpcd_read_byte(aux, DP_TUNNELING_STATUS, &val) < 0) return -EIO; *status_changed = val & status_change_mask; @@ -1095,7 +1095,7 @@ static int allocate_tunnel_bw(struct drm_dp_tunnel *tunnel, int bw) if (err) goto out; - if (drm_dp_dpcd_writeb(tunnel->aux, DP_REQUEST_BW, request_bw) < 0) { + if (drm_dp_dpcd_write_byte(tunnel->aux, DP_REQUEST_BW, request_bw) < 0) { err = -EIO; goto out; } @@ -1196,13 +1196,13 @@ static int check_and_clear_status_change(struct drm_dp_tunnel *tunnel) u8 mask = DP_BW_ALLOCATION_CAPABILITY_CHANGED | DP_ESTIMATED_BW_CHANGED; u8 val; - if (drm_dp_dpcd_readb(tunnel->aux, DP_TUNNELING_STATUS, &val) < 0) + if (drm_dp_dpcd_read_byte(tunnel->aux, DP_TUNNELING_STATUS, &val) < 0) goto out_err; val &= mask; if (val) { - if (drm_dp_dpcd_writeb(tunnel->aux, DP_TUNNELING_STATUS, val) < 0) + if (drm_dp_dpcd_write_byte(tunnel->aux, DP_TUNNELING_STATUS, val) < 0) goto out_err; return 1; @@ -1215,7 +1215,7 @@ static int check_and_clear_status_change(struct drm_dp_tunnel *tunnel) * Check for estimated BW changes explicitly to account for lost * BW change notifications. */ - if (drm_dp_dpcd_readb(tunnel->aux, DP_ESTIMATED_BW, &val) < 0) + if (drm_dp_dpcd_read_byte(tunnel->aux, DP_ESTIMATED_BW, &val) < 0) goto out_err; if (val * tunnel->bw_granularity != tunnel->estimated_bw) @@ -1300,7 +1300,7 @@ int drm_dp_tunnel_handle_irq(struct drm_dp_tunnel_mgr *mgr, struct drm_dp_aux *a { u8 val; - if (drm_dp_dpcd_readb(aux, DP_TUNNELING_STATUS, &val) < 0) + if (drm_dp_dpcd_read_byte(aux, DP_TUNNELING_STATUS, &val) < 0) return -EIO; if (val & (DP_BW_REQUEST_SUCCEEDED | DP_BW_REQUEST_FAILED)) diff --git a/drivers/gpu/drm/display/drm_hdmi_helper.c b/drivers/gpu/drm/display/drm_hdmi_helper.c index 74dd4d01dd9b..855cb02b827d 100644 --- a/drivers/gpu/drm/display/drm_hdmi_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_helper.c @@ -256,3 +256,171 @@ drm_hdmi_compute_mode_clock(const struct drm_display_mode *mode, return DIV_ROUND_CLOSEST_ULL(clock * bpc, 8); } EXPORT_SYMBOL(drm_hdmi_compute_mode_clock); + +struct drm_hdmi_acr_n_cts_entry { + unsigned int n; + unsigned int cts; +}; + +struct drm_hdmi_acr_data { + unsigned long tmds_clock_khz; + struct drm_hdmi_acr_n_cts_entry n_cts_32k, + n_cts_44k1, + n_cts_48k; +}; + +static const struct drm_hdmi_acr_data hdmi_acr_n_cts[] = { + { + /* "Other" entry */ + .n_cts_32k = { .n = 4096, }, + .n_cts_44k1 = { .n = 6272, }, + .n_cts_48k = { .n = 6144, }, + }, { + .tmds_clock_khz = 25175, + .n_cts_32k = { .n = 4576, .cts = 28125, }, + .n_cts_44k1 = { .n = 7007, .cts = 31250, }, + .n_cts_48k = { .n = 6864, .cts = 28125, }, + }, { + .tmds_clock_khz = 25200, + .n_cts_32k = { .n = 4096, .cts = 25200, }, + .n_cts_44k1 = { .n = 6272, .cts = 28000, }, + .n_cts_48k = { .n = 6144, .cts = 25200, }, + }, { + .tmds_clock_khz = 27000, + .n_cts_32k = { .n = 4096, .cts = 27000, }, + .n_cts_44k1 = { .n = 6272, .cts = 30000, }, + .n_cts_48k = { .n = 6144, .cts = 27000, }, + }, { + .tmds_clock_khz = 27027, + .n_cts_32k = { .n = 4096, .cts = 27027, }, + .n_cts_44k1 = { .n = 6272, .cts = 30030, }, + .n_cts_48k = { .n = 6144, .cts = 27027, }, + }, { + .tmds_clock_khz = 54000, + .n_cts_32k = { .n = 4096, .cts = 54000, }, + .n_cts_44k1 = { .n = 6272, .cts = 60000, }, + .n_cts_48k = { .n = 6144, .cts = 54000, }, + }, { + .tmds_clock_khz = 54054, + .n_cts_32k = { .n = 4096, .cts = 54054, }, + .n_cts_44k1 = { .n = 6272, .cts = 60060, }, + .n_cts_48k = { .n = 6144, .cts = 54054, }, + }, { + .tmds_clock_khz = 74176, + .n_cts_32k = { .n = 11648, .cts = 210937, }, /* and 210938 */ + .n_cts_44k1 = { .n = 17836, .cts = 234375, }, + .n_cts_48k = { .n = 11648, .cts = 140625, }, + }, { + .tmds_clock_khz = 74250, + .n_cts_32k = { .n = 4096, .cts = 74250, }, + .n_cts_44k1 = { .n = 6272, .cts = 82500, }, + .n_cts_48k = { .n = 6144, .cts = 74250, }, + }, { + .tmds_clock_khz = 148352, + .n_cts_32k = { .n = 11648, .cts = 421875, }, + .n_cts_44k1 = { .n = 8918, .cts = 234375, }, + .n_cts_48k = { .n = 5824, .cts = 140625, }, + }, { + .tmds_clock_khz = 148500, + .n_cts_32k = { .n = 4096, .cts = 148500, }, + .n_cts_44k1 = { .n = 6272, .cts = 165000, }, + .n_cts_48k = { .n = 6144, .cts = 148500, }, + }, { + .tmds_clock_khz = 296703, + .n_cts_32k = { .n = 5824, .cts = 421875, }, + .n_cts_44k1 = { .n = 4459, .cts = 234375, }, + .n_cts_48k = { .n = 5824, .cts = 281250, }, + }, { + .tmds_clock_khz = 297000, + .n_cts_32k = { .n = 3072, .cts = 222750, }, + .n_cts_44k1 = { .n = 4704, .cts = 247500, }, + .n_cts_48k = { .n = 5120, .cts = 247500, }, + }, { + .tmds_clock_khz = 593407, + .n_cts_32k = { .n = 5824, .cts = 843750, }, + .n_cts_44k1 = { .n = 8918, .cts = 937500, }, + .n_cts_48k = { .n = 5824, .cts = 562500, }, + }, { + .tmds_clock_khz = 594000, + .n_cts_32k = { .n = 3072, .cts = 445500, }, + .n_cts_44k1 = { .n = 9408, .cts = 990000, }, + .n_cts_48k = { .n = 6144, .cts = 594000, }, + }, +}; + +static int drm_hdmi_acr_find_tmds_entry(unsigned long tmds_clock_khz) +{ + int i; + + /* skip the "other" entry */ + for (i = 1; i < ARRAY_SIZE(hdmi_acr_n_cts); i++) { + if (hdmi_acr_n_cts[i].tmds_clock_khz == tmds_clock_khz) + return i; + } + + return 0; +} + +/** + * drm_hdmi_acr_get_n_cts() - get N and CTS values for Audio Clock Regeneration + * + * @tmds_char_rate: TMDS clock (char rate) as used by the HDMI connector + * @sample_rate: audio sample rate + * @out_n: a pointer to write the N value + * @out_cts: a pointer to write the CTS value + * + * Get the N and CTS values (either by calculating them or by returning data + * from the tables. This follows the HDMI 1.4b Section 7.2 "Audio Sample Clock + * Capture and Regeneration". + * + * Note, @sample_rate corresponds to the Fs value, see sections 7.2.4 - 7.2.6 + * on how to select Fs for non-L-PCM formats. + */ +void +drm_hdmi_acr_get_n_cts(unsigned long long tmds_char_rate, + unsigned int sample_rate, + unsigned int *out_n, + unsigned int *out_cts) +{ + /* be a bit more tolerant, especially for the 1.001 entries */ + unsigned long tmds_clock_khz = DIV_ROUND_CLOSEST_ULL(tmds_char_rate, 1000); + const struct drm_hdmi_acr_n_cts_entry *entry; + unsigned int n, cts, mult; + int tmds_idx; + + tmds_idx = drm_hdmi_acr_find_tmds_entry(tmds_clock_khz); + + /* + * Don't change the order, 192 kHz is divisible by 48k and 32k, but it + * should use 48k entry. + */ + if (sample_rate % 48000 == 0) { + entry = &hdmi_acr_n_cts[tmds_idx].n_cts_48k; + mult = sample_rate / 48000; + } else if (sample_rate % 44100 == 0) { + entry = &hdmi_acr_n_cts[tmds_idx].n_cts_44k1; + mult = sample_rate / 44100; + } else if (sample_rate % 32000 == 0) { + entry = &hdmi_acr_n_cts[tmds_idx].n_cts_32k; + mult = sample_rate / 32000; + } else { + entry = NULL; + } + + if (entry) { + n = entry->n * mult; + cts = entry->cts; + } else { + /* Recommended optimal value, HDMI 1.4b, Section 7.2.1 */ + n = 128 * sample_rate / 1000; + cts = 0; + } + + if (!cts) + cts = DIV_ROUND_CLOSEST_ULL(tmds_char_rate * n, + 128 * sample_rate); + + *out_n = n; + *out_cts = cts; +} +EXPORT_SYMBOL(drm_hdmi_acr_get_n_cts); diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index c205f37da1e1..d9d9948b29e9 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -9,6 +9,298 @@ #include #include +/** + * DOC: hdmi helpers + * + * These functions contain an implementation of the HDMI specification + * in the form of KMS helpers. + * + * It contains TMDS character rate computation, automatic selection of + * output formats, infoframes generation, etc. + * + * Infoframes Compliance + * ~~~~~~~~~~~~~~~~~~~~~ + * + * Drivers using the helpers will expose the various infoframes + * generated according to the HDMI specification in debugfs. + * + * Compliance can then be tested using ``edid-decode`` from the ``v4l-utils`` project + * (https://git.linuxtv.org/v4l-utils.git/). A sample run would look like: + * + * .. code-block:: bash + * + * # edid-decode \ + * -I /sys/kernel/debug/dri/1/HDMI-A-1/infoframes/audio \ + * -I /sys/kernel/debug/dri/1/HDMI-A-1/infoframes/avi \ + * -I /sys/kernel/debug/dri/1/HDMI-A-1/infoframes/hdmi \ + * -I /sys/kernel/debug/dri/1/HDMI-A-1/infoframes/hdr_drm \ + * -I /sys/kernel/debug/dri/1/HDMI-A-1/infoframes/spd \ + * /sys/class/drm/card1-HDMI-A-1/edid \ + * -c + * + * edid-decode (hex): + * + * 00 ff ff ff ff ff ff 00 1e 6d f4 5b 1e ef 06 00 + * 07 20 01 03 80 2f 34 78 ea 24 05 af 4f 42 ab 25 + * 0f 50 54 21 08 00 d1 c0 61 40 45 40 01 01 01 01 + * 01 01 01 01 01 01 98 d0 00 40 a1 40 d4 b0 30 20 + * 3a 00 d1 0b 12 00 00 1a 00 00 00 fd 00 3b 3d 1e + * b2 31 00 0a 20 20 20 20 20 20 00 00 00 fc 00 4c + * 47 20 53 44 51 48 44 0a 20 20 20 20 00 00 00 ff + * 00 32 30 37 4e 54 52 4c 44 43 34 33 30 0a 01 46 + * + * 02 03 42 72 23 09 07 07 4d 01 03 04 90 12 13 1f + * 22 5d 5e 5f 60 61 83 01 00 00 6d 03 0c 00 10 00 + * b8 3c 20 00 60 01 02 03 67 d8 5d c4 01 78 80 03 + * e3 0f 00 18 e2 00 6a e3 05 c0 00 e6 06 05 01 52 + * 52 51 11 5d 00 a0 a0 40 29 b0 30 20 3a 00 d1 0b + * 12 00 00 1a 00 00 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 c3 + * + * ---------------- + * + * Block 0, Base EDID: + * EDID Structure Version & Revision: 1.3 + * Vendor & Product Identification: + * Manufacturer: GSM + * Model: 23540 + * Serial Number: 454430 (0x0006ef1e) + * Made in: week 7 of 2022 + * Basic Display Parameters & Features: + * Digital display + * Maximum image size: 47 cm x 52 cm + * Gamma: 2.20 + * DPMS levels: Standby Suspend Off + * RGB color display + * First detailed timing is the preferred timing + * Color Characteristics: + * Red : 0.6835, 0.3105 + * Green: 0.2587, 0.6679 + * Blue : 0.1445, 0.0585 + * White: 0.3134, 0.3291 + * Established Timings I & II: + * DMT 0x04: 640x480 59.940476 Hz 4:3 31.469 kHz 25.175000 MHz + * DMT 0x09: 800x600 60.316541 Hz 4:3 37.879 kHz 40.000000 MHz + * DMT 0x10: 1024x768 60.003840 Hz 4:3 48.363 kHz 65.000000 MHz + * Standard Timings: + * DMT 0x52: 1920x1080 60.000000 Hz 16:9 67.500 kHz 148.500000 MHz + * DMT 0x10: 1024x768 60.003840 Hz 4:3 48.363 kHz 65.000000 MHz + * DMT 0x09: 800x600 60.316541 Hz 4:3 37.879 kHz 40.000000 MHz + * Detailed Timing Descriptors: + * DTD 1: 2560x2880 59.966580 Hz 8:9 185.417 kHz 534.000000 MHz (465 mm x 523 mm) + * Hfront 48 Hsync 32 Hback 240 Hpol P + * Vfront 3 Vsync 10 Vback 199 Vpol N + * Display Range Limits: + * Monitor ranges (GTF): 59-61 Hz V, 30-178 kHz H, max dotclock 490 MHz + * Display Product Name: 'LG SDQHD' + * Display Product Serial Number: '207NTRLDC430' + * Extension blocks: 1 + * Checksum: 0x46 + * + * ---------------- + * + * Block 1, CTA-861 Extension Block: + * Revision: 3 + * Basic audio support + * Supports YCbCr 4:4:4 + * Supports YCbCr 4:2:2 + * Native detailed modes: 2 + * Audio Data Block: + * Linear PCM: + * Max channels: 2 + * Supported sample rates (kHz): 48 44.1 32 + * Supported sample sizes (bits): 24 20 16 + * Video Data Block: + * VIC 1: 640x480 59.940476 Hz 4:3 31.469 kHz 25.175000 MHz + * VIC 3: 720x480 59.940060 Hz 16:9 31.469 kHz 27.000000 MHz + * VIC 4: 1280x720 60.000000 Hz 16:9 45.000 kHz 74.250000 MHz + * VIC 16: 1920x1080 60.000000 Hz 16:9 67.500 kHz 148.500000 MHz (native) + * VIC 18: 720x576 50.000000 Hz 16:9 31.250 kHz 27.000000 MHz + * VIC 19: 1280x720 50.000000 Hz 16:9 37.500 kHz 74.250000 MHz + * VIC 31: 1920x1080 50.000000 Hz 16:9 56.250 kHz 148.500000 MHz + * VIC 34: 1920x1080 30.000000 Hz 16:9 33.750 kHz 74.250000 MHz + * VIC 93: 3840x2160 24.000000 Hz 16:9 54.000 kHz 297.000000 MHz + * VIC 94: 3840x2160 25.000000 Hz 16:9 56.250 kHz 297.000000 MHz + * VIC 95: 3840x2160 30.000000 Hz 16:9 67.500 kHz 297.000000 MHz + * VIC 96: 3840x2160 50.000000 Hz 16:9 112.500 kHz 594.000000 MHz + * VIC 97: 3840x2160 60.000000 Hz 16:9 135.000 kHz 594.000000 MHz + * Speaker Allocation Data Block: + * FL/FR - Front Left/Right + * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: + * Source physical address: 1.0.0.0 + * Supports_AI + * DC_36bit + * DC_30bit + * DC_Y444 + * Maximum TMDS clock: 300 MHz + * Extended HDMI video details: + * HDMI VICs: + * HDMI VIC 1: 3840x2160 30.000000 Hz 16:9 67.500 kHz 297.000000 MHz + * HDMI VIC 2: 3840x2160 25.000000 Hz 16:9 56.250 kHz 297.000000 MHz + * HDMI VIC 3: 3840x2160 24.000000 Hz 16:9 54.000 kHz 297.000000 MHz + * Vendor-Specific Data Block (HDMI Forum), OUI C4-5D-D8: + * Version: 1 + * Maximum TMDS Character Rate: 600 MHz + * SCDC Present + * Supports 12-bits/component Deep Color 4:2:0 Pixel Encoding + * Supports 10-bits/component Deep Color 4:2:0 Pixel Encoding + * YCbCr 4:2:0 Capability Map Data Block: + * VIC 96: 3840x2160 50.000000 Hz 16:9 112.500 kHz 594.000000 MHz + * VIC 97: 3840x2160 60.000000 Hz 16:9 135.000 kHz 594.000000 MHz + * Video Capability Data Block: + * YCbCr quantization: No Data + * RGB quantization: Selectable (via AVI Q) + * PT scan behavior: Always Underscanned + * IT scan behavior: Always Underscanned + * CE scan behavior: Always Underscanned + * Colorimetry Data Block: + * BT2020YCC + * BT2020RGB + * HDR Static Metadata Data Block: + * Electro optical transfer functions: + * Traditional gamma - SDR luminance range + * SMPTE ST2084 + * Supported static metadata descriptors: + * Static metadata type 1 + * Desired content max luminance: 82 (295.365 cd/m^2) + * Desired content max frame-average luminance: 82 (295.365 cd/m^2) + * Desired content min luminance: 81 (0.298 cd/m^2) + * Detailed Timing Descriptors: + * DTD 2: 2560x2880 29.986961 Hz 8:9 87.592 kHz 238.250000 MHz (465 mm x 523 mm) + * Hfront 48 Hsync 32 Hback 80 Hpol P + * Vfront 3 Vsync 10 Vback 28 Vpol N + * Checksum: 0xc3 Unused space in Extension Block: 43 bytes + * + * ---------------- + * + * edid-decode 1.29.0-5346 + * edid-decode SHA: c363e9aa6d70 2025-03-11 11:41:18 + * + * Warnings: + * + * Block 1, CTA-861 Extension Block: + * IT Video Formats are overscanned by default, but normally this should be underscanned. + * Video Data Block: VIC 1 and the first DTD are not identical. Is this intended? + * Video Data Block: All VICs are in ascending order, and the first (preferred) VIC <= 4, is that intended? + * Video Capability Data Block: Set Selectable YCbCr Quantization to avoid interop issues. + * Video Capability Data Block: S_PT is equal to S_IT and S_CE, so should be set to 0 instead. + * Colorimetry Data Block: Set the sRGB colorimetry bit to avoid interop issues. + * Display Product Serial Number is set, so the Serial Number in the Base EDID should be 0. + * EDID: + * Base EDID: Some timings are out of range of the Monitor Ranges: + * Vertical Freq: 24.000 - 60.317 Hz (Monitor: 59.000 - 61.000 Hz) + * Horizontal Freq: 31.250 - 185.416 kHz (Monitor: 30.000 - 178.000 kHz) + * Maximum Clock: 594.000 MHz (Monitor: 490.000 MHz) + * + * Failures: + * + * Block 1, CTA-861 Extension Block: + * Video Capability Data Block: IT video formats are always underscanned, but bit 7 of Byte 3 of the CTA-861 Extension header is set to overscanned. + * EDID: + * CTA-861: Native progressive timings are a mix of several resolutions. + * + * EDID conformity: FAIL + * + * ================ + * + * InfoFrame of '/sys/kernel/debug/dri/1/HDMI-A-1/infoframes/audio' was empty. + * + * ================ + * + * edid-decode InfoFrame (hex): + * + * 82 02 0d 31 12 28 04 00 00 00 00 00 00 00 00 00 + * 00 + * + * ---------------- + * + * HDMI InfoFrame Checksum: 0x31 + * + * AVI InfoFrame + * Version: 2 + * Length: 13 + * Y: Color Component Sample Format: RGB + * A: Active Format Information Present: Yes + * B: Bar Data Present: Bar Data not present + * S: Scan Information: Composed for an underscanned display + * C: Colorimetry: No Data + * M: Picture Aspect Ratio: 16:9 + * R: Active Portion Aspect Ratio: 8 + * ITC: IT Content: No Data + * EC: Extended Colorimetry: xvYCC601 + * Q: RGB Quantization Range: Limited Range + * SC: Non-Uniform Picture Scaling: No Known non-uniform scaling + * YQ: YCC Quantization Range: Limited Range + * CN: IT Content Type: Graphics + * PR: Pixel Data Repetition Count: 0 + * Line Number of End of Top Bar: 0 + * Line Number of Start of Bottom Bar: 0 + * Pixel Number of End of Left Bar: 0 + * Pixel Number of Start of Right Bar: 0 + * + * ---------------- + * + * AVI InfoFrame conformity: PASS + * + * ================ + * + * edid-decode InfoFrame (hex): + * + * 81 01 05 49 03 0c 00 20 01 + * + * ---------------- + * + * HDMI InfoFrame Checksum: 0x49 + * + * Vendor-Specific InfoFrame (HDMI), OUI 00-0C-03 + * Version: 1 + * Length: 5 + * HDMI Video Format: HDMI_VIC is present + * HDMI VIC 1: 3840x2160 30.000000 Hz 16:9 67.500 kHz 297.000000 MHz + * + * ---------------- + * + * Vendor-Specific InfoFrame (HDMI), OUI 00-0C-03 conformity: PASS + * + * ================ + * + * InfoFrame of '/sys/kernel/debug/dri/1/HDMI-A-1/infoframes/hdr_drm' was empty. + * + * ================ + * + * edid-decode InfoFrame (hex): + * + * 83 01 19 93 42 72 6f 61 64 63 6f 6d 56 69 64 65 + * 6f 63 6f 72 65 00 00 00 00 00 00 00 09 + * + * ---------------- + * + * HDMI InfoFrame Checksum: 0x93 + * + * Source Product Description InfoFrame + * Version: 1 + * Length: 25 + * Vendor Name: 'Broadcom' + * Product Description: 'Videocore' + * Source Information: PC general + * + * ---------------- + * + * Source Product Description InfoFrame conformity: PASS + * + * Testing + * ~~~~~~~ + * + * The helpers have unit testing and can be tested using kunit with: + * + * .. code-block:: bash + * + * $ ./tools/testing/kunit/kunit.py run \ + * --kunitconfig=drivers/gpu/drm/tests \ + * drm_atomic_helper_connector_hdmi_* + */ + /** * __drm_atomic_helper_connector_hdmi_reset() - Initializes all HDMI @drm_connector_state resources * @connector: DRM connector @@ -816,7 +1108,7 @@ drm_atomic_helper_connector_hdmi_update(struct drm_connector *connector, * @status: Connection status * * This function should be called as a part of the .detect() / .detect_ctx() - * callbacks, updating the HDMI-specific connector's data. + * callbacks for all status changes. */ void drm_atomic_helper_connector_hdmi_hotplug(struct drm_connector *connector, enum drm_connector_status status) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 9ea2611770f4..0138cf0b8b63 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -933,6 +933,9 @@ EXPORT_SYMBOL(drm_atomic_get_new_private_obj_state); * state). This is especially true in enable hooks because the pipeline has * changed. * + * If you don't have access to the atomic state, see + * drm_atomic_get_connector_for_encoder(). + * * Returns: The old connector connected to @encoder, or NULL if the encoder is * not connected. */ @@ -967,6 +970,9 @@ EXPORT_SYMBOL(drm_atomic_get_old_connector_for_encoder); * attached to @encoder vs ones that do (and to inspect their state). This is * especially true in disable hooks because the pipeline will change. * + * If you don't have access to the atomic state, see + * drm_atomic_get_connector_for_encoder(). + * * Returns: The new connector connected to @encoder, or NULL if the encoder is * not connected. */ @@ -987,6 +993,59 @@ drm_atomic_get_new_connector_for_encoder(const struct drm_atomic_state *state, } EXPORT_SYMBOL(drm_atomic_get_new_connector_for_encoder); +/** + * drm_atomic_get_connector_for_encoder - Get connector currently assigned to an encoder + * @encoder: The encoder to find the connector of + * @ctx: Modeset locking context + * + * This function finds and returns the connector currently assigned to + * an @encoder. + * + * It is similar to the drm_atomic_get_old_connector_for_encoder() and + * drm_atomic_get_new_connector_for_encoder() helpers, but doesn't + * require access to the atomic state. If you have access to it, prefer + * using these. This helper is typically useful in situations where you + * don't have access to the atomic state, like detect, link repair, + * threaded interrupt handlers, or hooks from other frameworks (ALSA, + * CEC, etc.). + * + * Returns: + * The connector connected to @encoder, or an error pointer otherwise. + * When the error is EDEADLK, a deadlock has been detected and the + * sequence must be restarted. + */ +struct drm_connector * +drm_atomic_get_connector_for_encoder(const struct drm_encoder *encoder, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_connector_list_iter conn_iter; + struct drm_connector *out_connector = ERR_PTR(-EINVAL); + struct drm_connector *connector; + struct drm_device *dev = encoder->dev; + int ret; + + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, ctx); + if (ret) + return ERR_PTR(ret); + + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (!connector->state) + continue; + + if (encoder == connector->state->best_encoder) { + out_connector = connector; + break; + } + } + drm_connector_list_iter_end(&conn_iter); + drm_modeset_unlock(&dev->mode_config.connection_mutex); + + return out_connector; +} +EXPORT_SYMBOL(drm_atomic_get_connector_for_encoder); + + /** * drm_atomic_get_old_crtc_for_encoder - Get old crtc for an encoder * @state: Atomic state diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 5302ab324898..ee64ca1b1bec 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3409,6 +3409,9 @@ EXPORT_SYMBOL(drm_atomic_helper_disable_all); * This implies a reset of all active components available between the CRTC and * connectors. * + * A variant of this function exists with + * drm_bridge_helper_reset_crtc(), dedicated to bridges. + * * NOTE: This relies on resetting &drm_crtc_state.connectors_changed. * For drivers which optimize out unnecessary modesets this will result in * a no-op commit, achieving nothing. diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 6e74de833466..6852d73c931c 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -75,6 +75,12 @@ * the currently visible vertical area of the &drm_crtc. * FB_ID: * Mode object ID of the &drm_framebuffer this plane should scan out. + * + * When a KMS client is performing front-buffer rendering, it should set + * FB_ID to the same front-buffer FB on each atomic commit. This implies + * to the driver that it needs to re-read the same FB again. Otherwise + * drivers which do not employ continuously repeated scanout cycles might + * not update the screen. * CRTC_ID: * Mode object ID of the &drm_crtc this plane should be connected to. * diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index fa2794217a90..b4c89ec01998 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -21,6 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include #include #include #include @@ -198,10 +199,96 @@ static DEFINE_MUTEX(bridge_lock); static LIST_HEAD(bridge_list); +static void __drm_bridge_free(struct kref *kref) +{ + struct drm_bridge *bridge = container_of(kref, struct drm_bridge, refcount); + + kfree(bridge->container); +} + +/** + * drm_bridge_get - Acquire a bridge reference + * @bridge: DRM bridge + * + * This function increments the bridge's refcount. + * + * Returns: + * Pointer to @bridge. + */ +struct drm_bridge *drm_bridge_get(struct drm_bridge *bridge) +{ + if (bridge) + kref_get(&bridge->refcount); + + return bridge; +} +EXPORT_SYMBOL(drm_bridge_get); + +/** + * drm_bridge_put - Release a bridge reference + * @bridge: DRM bridge + * + * This function decrements the bridge's reference count and frees the + * object if the reference count drops to zero. + */ +void drm_bridge_put(struct drm_bridge *bridge) +{ + if (bridge) + kref_put(&bridge->refcount, __drm_bridge_free); +} +EXPORT_SYMBOL(drm_bridge_put); + +/** + * drm_bridge_put_void - wrapper to drm_bridge_put() taking a void pointer + * + * @data: pointer to @struct drm_bridge, cast to a void pointer + * + * Wrapper of drm_bridge_put() to be used when a function taking a void + * pointer is needed, for example as a devm action. + */ +static void drm_bridge_put_void(void *data) +{ + struct drm_bridge *bridge = (struct drm_bridge *)data; + + drm_bridge_put(bridge); +} + +void *__devm_drm_bridge_alloc(struct device *dev, size_t size, size_t offset, + const struct drm_bridge_funcs *funcs) +{ + void *container; + struct drm_bridge *bridge; + int err; + + if (!funcs) { + dev_warn(dev, "Missing funcs pointer\n"); + return ERR_PTR(-EINVAL); + } + + container = kzalloc(size, GFP_KERNEL); + if (!container) + return ERR_PTR(-ENOMEM); + + bridge = container + offset; + bridge->container = container; + bridge->funcs = funcs; + kref_init(&bridge->refcount); + + err = devm_add_action_or_reset(dev, drm_bridge_put_void, bridge); + if (err) + return ERR_PTR(err); + + return container; +} +EXPORT_SYMBOL(__devm_drm_bridge_alloc); + /** * drm_bridge_add - add the given bridge to the global bridge list * * @bridge: bridge control structure + * + * The bridge to be added must have been allocated by + * devm_drm_bridge_alloc(). */ void drm_bridge_add(struct drm_bridge *bridge) { @@ -280,6 +367,11 @@ static const struct drm_private_state_funcs drm_bridge_priv_state_funcs = { .atomic_destroy_state = drm_bridge_atomic_destroy_priv_state, }; +static bool drm_bridge_is_atomic(struct drm_bridge *bridge) +{ + return bridge->funcs->atomic_reset != NULL; +} + /** * drm_bridge_attach - attach the bridge to an encoder's chain * @@ -327,12 +419,12 @@ int drm_bridge_attach(struct drm_encoder *encoder, struct drm_bridge *bridge, list_add(&bridge->chain_node, &encoder->bridge_chain); if (bridge->funcs->attach) { - ret = bridge->funcs->attach(bridge, flags); + ret = bridge->funcs->attach(bridge, encoder, flags); if (ret < 0) goto err_reset_bridge; } - if (bridge->funcs->atomic_reset) { + if (drm_bridge_is_atomic(bridge)) { struct drm_bridge_state *state; state = bridge->funcs->atomic_reset(bridge); @@ -377,7 +469,7 @@ void drm_bridge_detach(struct drm_bridge *bridge) if (WARN_ON(!bridge->dev)) return; - if (bridge->funcs->atomic_reset) + if (drm_bridge_is_atomic(bridge)) drm_atomic_private_obj_fini(&bridge->base); if (bridge->funcs->detach) @@ -1300,6 +1392,75 @@ struct drm_bridge *of_drm_find_bridge(struct device_node *np) EXPORT_SYMBOL(of_drm_find_bridge); #endif +static void drm_bridge_debugfs_show_bridge(struct drm_printer *p, + struct drm_bridge *bridge, + unsigned int idx) +{ + drm_printf(p, "bridge[%u]: %ps\n", idx, bridge->funcs); + drm_printf(p, "\ttype: [%d] %s\n", + bridge->type, + drm_get_connector_type_name(bridge->type)); + + if (bridge->of_node) + drm_printf(p, "\tOF: %pOFfc\n", bridge->of_node); + + drm_printf(p, "\tops: [0x%x]", bridge->ops); + if (bridge->ops & DRM_BRIDGE_OP_DETECT) + drm_puts(p, " detect"); + if (bridge->ops & DRM_BRIDGE_OP_EDID) + drm_puts(p, " edid"); + if (bridge->ops & DRM_BRIDGE_OP_HPD) + drm_puts(p, " hpd"); + if (bridge->ops & DRM_BRIDGE_OP_MODES) + drm_puts(p, " modes"); + if (bridge->ops & DRM_BRIDGE_OP_HDMI) + drm_puts(p, " hdmi"); + drm_puts(p, "\n"); +} + +static int allbridges_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_bridge *bridge; + unsigned int idx = 0; + + mutex_lock(&bridge_lock); + + list_for_each_entry(bridge, &bridge_list, list) + drm_bridge_debugfs_show_bridge(&p, bridge, idx++); + + mutex_unlock(&bridge_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(allbridges); + +static int encoder_bridges_show(struct seq_file *m, void *data) +{ + struct drm_encoder *encoder = m->private; + struct drm_printer p = drm_seq_file_printer(m); + struct drm_bridge *bridge; + unsigned int idx = 0; + + drm_for_each_bridge_in_chain(encoder, bridge) + drm_bridge_debugfs_show_bridge(&p, bridge, idx++); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(encoder_bridges); + +void drm_bridge_debugfs_params(struct dentry *root) +{ + debugfs_create_file("bridges", 0444, root, NULL, &allbridges_fops); +} + +void drm_bridge_debugfs_encoder_params(struct dentry *root, + struct drm_encoder *encoder) +{ + /* bridges list */ + debugfs_create_file("bridges", 0444, root, encoder, &encoder_bridges_fops); +} + MODULE_AUTHOR("Ajay Kumar "); MODULE_DESCRIPTION("DRM bridge infrastructure"); MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/drm_bridge_helper.c b/drivers/gpu/drm/drm_bridge_helper.c new file mode 100644 index 000000000000..af80d2496194 --- /dev/null +++ b/drivers/gpu/drm/drm_bridge_helper.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include + +/** + * drm_bridge_helper_reset_crtc - Reset the pipeline feeding a bridge + * @bridge: DRM bridge to reset + * @ctx: lock acquisition context + * + * Reset a @bridge pipeline. It will power-cycle all active components + * between the CRTC and connector that bridge is connected to. + * + * As it relies on drm_atomic_helper_reset_crtc(), the same limitations + * apply. + * + * Returns: + * + * 0 on success or a negative error code on failure. If the error + * returned is EDEADLK, the whole atomic sequence must be restarted. + */ +int drm_bridge_helper_reset_crtc(struct drm_bridge *bridge, + struct drm_modeset_acquire_ctx *ctx) +{ + struct drm_connector *connector; + struct drm_encoder *encoder = bridge->encoder; + struct drm_device *dev = encoder->dev; + struct drm_crtc *crtc; + int ret; + + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, ctx); + if (ret) + return ret; + + connector = drm_atomic_get_connector_for_encoder(encoder, ctx); + if (IS_ERR(connector)) { + ret = PTR_ERR(connector); + goto out; + } + + if (!connector->state) { + ret = -EINVAL; + goto out; + } + + crtc = connector->state->crtc; + ret = drm_atomic_helper_reset_crtc(crtc, ctx); + if (ret) + goto out; + +out: + drm_modeset_unlock(&dev->mode_config.connection_mutex); + return ret; +} +EXPORT_SYMBOL(drm_bridge_helper_reset_crtc); diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 549b28a5918c..f1de7faf9fb4 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -174,7 +174,7 @@ EXPORT_SYMBOL(drm_client_release); static void drm_client_buffer_delete(struct drm_client_buffer *buffer) { if (buffer->gem) { - drm_gem_vunmap_unlocked(buffer->gem, &buffer->map); + drm_gem_vunmap(buffer->gem, &buffer->map); drm_gem_object_put(buffer->gem); } @@ -252,7 +252,7 @@ int drm_client_buffer_vmap_local(struct drm_client_buffer *buffer, drm_gem_lock(gem); - ret = drm_gem_vmap(gem, map); + ret = drm_gem_vmap_locked(gem, map); if (ret) goto err_drm_gem_vmap_unlocked; *map_copy = *map; @@ -278,7 +278,7 @@ void drm_client_buffer_vunmap_local(struct drm_client_buffer *buffer) struct drm_gem_object *gem = buffer->gem; struct iosys_map *map = &buffer->map; - drm_gem_vunmap(gem, map); + drm_gem_vunmap_locked(gem, map); drm_gem_unlock(gem); } EXPORT_SYMBOL(drm_client_buffer_vunmap_local); @@ -316,7 +316,7 @@ drm_client_buffer_vmap(struct drm_client_buffer *buffer, ret = drm_gem_pin_locked(gem); if (ret) goto err_drm_gem_pin_locked; - ret = drm_gem_vmap(gem, map); + ret = drm_gem_vmap_locked(gem, map); if (ret) goto err_drm_gem_vmap; @@ -348,7 +348,7 @@ void drm_client_buffer_vunmap(struct drm_client_buffer *buffer) struct iosys_map *map = &buffer->map; drm_gem_lock(gem); - drm_gem_vunmap(gem, map); + drm_gem_vunmap_locked(gem, map); drm_gem_unpin_locked(gem); drm_gem_unlock(gem); } diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index aca442c25209..0f9d5ba36c81 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -39,7 +39,7 @@ int drm_client_modeset_create(struct drm_client_dev *client) unsigned int max_connector_count = 1; struct drm_mode_set *modeset; struct drm_crtc *crtc; - unsigned int i = 0; + int i = 0; /* Add terminating zero entry to enable index less iteration */ client->modesets = kcalloc(num_crtc + 1, sizeof(*client->modesets), GFP_KERNEL); @@ -73,9 +73,10 @@ err_free: static void drm_client_modeset_release(struct drm_client_dev *client) { struct drm_mode_set *modeset; - unsigned int i; drm_client_for_each_modeset(modeset, client) { + int i; + drm_mode_destroy(client->dev, modeset->mode); modeset->mode = NULL; modeset->fb = NULL; @@ -117,10 +118,10 @@ drm_client_find_modeset(struct drm_client_dev *client, struct drm_crtc *crtc) return NULL; } -static struct drm_display_mode * +static const struct drm_display_mode * drm_connector_get_tiled_mode(struct drm_connector *connector) { - struct drm_display_mode *mode; + const struct drm_display_mode *mode; list_for_each_entry(mode, &connector->modes, head) { if (mode->hdisplay == connector->tile_h_size && @@ -130,10 +131,10 @@ drm_connector_get_tiled_mode(struct drm_connector *connector) return NULL; } -static struct drm_display_mode * +static const struct drm_display_mode * drm_connector_fallback_non_tiled_mode(struct drm_connector *connector) { - struct drm_display_mode *mode; + const struct drm_display_mode *mode; list_for_each_entry(mode, &connector->modes, head) { if (mode->hdisplay == connector->tile_h_size && @@ -144,10 +145,10 @@ drm_connector_fallback_non_tiled_mode(struct drm_connector *connector) return NULL; } -static struct drm_display_mode * +static const struct drm_display_mode * drm_connector_preferred_mode(struct drm_connector *connector, int width, int height) { - struct drm_display_mode *mode; + const struct drm_display_mode *mode; list_for_each_entry(mode, &connector->modes, head) { if (mode->hdisplay > width || @@ -159,16 +160,18 @@ drm_connector_preferred_mode(struct drm_connector *connector, int width, int hei return NULL; } -static struct drm_display_mode *drm_connector_first_mode(struct drm_connector *connector) +static const struct drm_display_mode * +drm_connector_first_mode(struct drm_connector *connector) { return list_first_entry_or_null(&connector->modes, struct drm_display_mode, head); } -static struct drm_display_mode *drm_connector_pick_cmdline_mode(struct drm_connector *connector) +static const struct drm_display_mode * +drm_connector_pick_cmdline_mode(struct drm_connector *connector) { - struct drm_cmdline_mode *cmdline_mode; - struct drm_display_mode *mode; + const struct drm_cmdline_mode *cmdline_mode; + const struct drm_display_mode *mode; bool prefer_non_interlace; /* @@ -237,9 +240,9 @@ static bool drm_connector_enabled(struct drm_connector *connector, bool strict) return enable; } -static void drm_client_connectors_enabled(struct drm_connector **connectors, +static void drm_client_connectors_enabled(struct drm_connector *connectors[], unsigned int connector_count, - bool *enabled) + bool enabled[]) { bool any_enabled = false; struct drm_connector *connector; @@ -263,16 +266,35 @@ static void drm_client_connectors_enabled(struct drm_connector **connectors, enabled[i] = drm_connector_enabled(connectors[i], false); } -static bool drm_client_target_cloned(struct drm_device *dev, - struct drm_connector **connectors, - unsigned int connector_count, - struct drm_display_mode **modes, - struct drm_client_offset *offsets, - bool *enabled, int width, int height) +static void mode_replace(struct drm_device *dev, + const struct drm_display_mode **dst, + const struct drm_display_mode *src) { - int count, i, j; + drm_mode_destroy(dev, (struct drm_display_mode *)*dst); + + *dst = src ? drm_mode_duplicate(dev, src) : NULL; +} + +static void modes_destroy(struct drm_device *dev, + const struct drm_display_mode *modes[], + int count) +{ + int i; + + for (i = 0; i < count; i++) + mode_replace(dev, &modes[i], NULL); +} + +static bool drm_client_target_cloned(struct drm_device *dev, + struct drm_connector *connectors[], + unsigned int connector_count, + const struct drm_display_mode *modes[], + struct drm_client_offset offsets[], + bool enabled[], int width, int height) +{ + int count, i; bool can_clone = false; - struct drm_display_mode *dmt_mode, *mode; + struct drm_display_mode *dmt_mode; /* only contemplate cloning in the single crtc case */ if (dev->mode_config.num_crtc > 1) @@ -291,9 +313,13 @@ static bool drm_client_target_cloned(struct drm_device *dev, /* check the command line or if nothing common pick 1024x768 */ can_clone = true; for (i = 0; i < connector_count; i++) { + int j; + if (!enabled[i]) continue; - modes[i] = drm_connector_pick_cmdline_mode(connectors[i]); + + mode_replace(dev, &modes[i], + drm_connector_pick_cmdline_mode(connectors[i])); if (!modes[i]) { can_clone = false; break; @@ -323,6 +349,8 @@ static bool drm_client_target_cloned(struct drm_device *dev, goto fail; for (i = 0; i < connector_count; i++) { + const struct drm_display_mode *mode; + if (!enabled[i]) continue; @@ -332,7 +360,7 @@ static bool drm_client_target_cloned(struct drm_device *dev, DRM_MODE_MATCH_CLOCK | DRM_MODE_MATCH_FLAGS | DRM_MODE_MATCH_3D_FLAGS)) - modes[i] = mode; + mode_replace(dev, &modes[i], mode); } if (!modes[i]) can_clone = false; @@ -349,19 +377,19 @@ fail: } static int drm_client_get_tile_offsets(struct drm_device *dev, - struct drm_connector **connectors, + struct drm_connector *connectors[], unsigned int connector_count, - struct drm_display_mode **modes, - struct drm_client_offset *offsets, + const struct drm_display_mode *modes[], + struct drm_client_offset offsets[], int idx, int h_idx, int v_idx) { - struct drm_connector *connector; int i; int hoffset = 0, voffset = 0; for (i = 0; i < connector_count; i++) { - connector = connectors[i]; + struct drm_connector *connector = connectors[i]; + if (!connector->has_tile) continue; @@ -384,14 +412,13 @@ static int drm_client_get_tile_offsets(struct drm_device *dev, } static bool drm_client_target_preferred(struct drm_device *dev, - struct drm_connector **connectors, + struct drm_connector *connectors[], unsigned int connector_count, - struct drm_display_mode **modes, - struct drm_client_offset *offsets, - bool *enabled, int width, int height) + const struct drm_display_mode *modes[], + struct drm_client_offset offsets[], + bool enabled[], int width, int height) { const u64 mask = BIT_ULL(connector_count) - 1; - struct drm_connector *connector; u64 conn_configured = 0; int tile_pass = 0; int num_tiled_conns = 0; @@ -405,7 +432,9 @@ static bool drm_client_target_preferred(struct drm_device *dev, retry: for (i = 0; i < connector_count; i++) { - connector = connectors[i]; + struct drm_connector *connector = connectors[i]; + const char *mode_type; + if (conn_configured & BIT_ULL(i)) continue; @@ -438,20 +467,23 @@ retry: modes, offsets, i, connector->tile_h_loc, connector->tile_v_loc); } - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] looking for cmdline mode\n", - connector->base.id, connector->name); - /* got for command line mode first */ - modes[i] = drm_connector_pick_cmdline_mode(connector); + mode_type = "cmdline"; + mode_replace(dev, &modes[i], + drm_connector_pick_cmdline_mode(connector)); + if (!modes[i]) { - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] looking for preferred mode, tile %d\n", - connector->base.id, connector->name, - connector->tile_group ? connector->tile_group->id : 0); - modes[i] = drm_connector_preferred_mode(connector, width, height); + mode_type = "preferred"; + mode_replace(dev, &modes[i], + drm_connector_preferred_mode(connector, width, height)); } - /* No preferred modes, pick one off the list */ - if (!modes[i]) - modes[i] = drm_connector_first_mode(connector); + + if (!modes[i]) { + mode_type = "first"; + mode_replace(dev, &modes[i], + drm_connector_first_mode(connector)); + } + /* * In case of tiled mode if all tiles not present fallback to * first available non tiled mode. @@ -466,18 +498,24 @@ retry: (connector->tile_h_loc == 0 && connector->tile_v_loc == 0 && !drm_connector_get_tiled_mode(connector))) { - drm_dbg_kms(dev, - "[CONNECTOR:%d:%s] Falling back to non-tiled mode\n", - connector->base.id, connector->name); - modes[i] = drm_connector_fallback_non_tiled_mode(connector); + mode_type = "non tiled"; + mode_replace(dev, &modes[i], + drm_connector_fallback_non_tiled_mode(connector)); } else { - modes[i] = drm_connector_get_tiled_mode(connector); + mode_type = "tiled"; + mode_replace(dev, &modes[i], + drm_connector_get_tiled_mode(connector)); } } - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Found mode %s\n", - connector->base.id, connector->name, - modes[i] ? modes[i]->name : "none"); + if (modes[i]) + drm_dbg_kms(dev, "[CONNECTOR:%d:%s] found %s mode: %s\n", + connector->base.id, connector->name, + mode_type, modes[i]->name); + else + drm_dbg_kms(dev, "[CONNECTOR:%d:%s] no mode found\n", + connector->base.id, connector->name); + conn_configured |= BIT_ULL(i); } @@ -502,18 +540,17 @@ static bool connector_has_possible_crtc(struct drm_connector *connector, } static int drm_client_pick_crtcs(struct drm_client_dev *client, - struct drm_connector **connectors, + struct drm_connector *connectors[], unsigned int connector_count, - struct drm_crtc **best_crtcs, - struct drm_display_mode **modes, + struct drm_crtc *best_crtcs[], + const struct drm_display_mode *modes[], int n, int width, int height) { struct drm_device *dev = client->dev; struct drm_connector *connector; int my_score, best_score, score; - struct drm_crtc **crtcs, *crtc; + struct drm_crtc **crtcs; struct drm_mode_set *modeset; - int o; if (n == connector_count) return 0; @@ -543,7 +580,8 @@ static int drm_client_pick_crtcs(struct drm_client_dev *client, * remaining connectors */ drm_client_for_each_modeset(modeset, client) { - crtc = modeset->crtc; + struct drm_crtc *crtc = modeset->crtc; + int o; if (!connector_has_possible_crtc(connector, crtc)) continue; @@ -577,17 +615,17 @@ static int drm_client_pick_crtcs(struct drm_client_dev *client, /* Try to read the BIOS display configuration and use it for the initial config */ static bool drm_client_firmware_config(struct drm_client_dev *client, - struct drm_connector **connectors, + struct drm_connector *connectors[], unsigned int connector_count, - struct drm_crtc **crtcs, - struct drm_display_mode **modes, - struct drm_client_offset *offsets, - bool *enabled, int width, int height) + struct drm_crtc *crtcs[], + const struct drm_display_mode *modes[], + struct drm_client_offset offsets[], + bool enabled[], int width, int height) { const int count = min_t(unsigned int, connector_count, BITS_PER_LONG); unsigned long conn_configured, conn_seq, mask; struct drm_device *dev = client->dev; - int i, j; + int i; bool *save_enabled; bool fallback = true, ret = true; int num_connectors_enabled = 0; @@ -621,11 +659,11 @@ static bool drm_client_firmware_config(struct drm_client_dev *client, retry: conn_seq = conn_configured; for (i = 0; i < count; i++) { - struct drm_connector *connector; + struct drm_connector *connector = connectors[i]; struct drm_encoder *encoder; - struct drm_crtc *new_crtc; - - connector = connectors[i]; + struct drm_crtc *crtc; + const char *mode_type; + int j; if (conn_configured & BIT(i)) continue; @@ -664,7 +702,7 @@ retry: num_connectors_enabled++; - new_crtc = connector->state->crtc; + crtc = connector->state->crtc; /* * Make sure we're not trying to drive multiple connectors @@ -672,69 +710,52 @@ retry: * match the BIOS. */ for (j = 0; j < count; j++) { - if (crtcs[j] == new_crtc) { - drm_dbg_kms(dev, "fallback: cloned configuration\n"); + if (crtcs[j] == crtc) { + drm_dbg_kms(dev, "[CONNECTOR:%d:%s] fallback: cloned configuration\n", + connector->base.id, connector->name); goto bail; } } - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] looking for cmdline mode\n", - connector->base.id, connector->name); + mode_type = "cmdline"; + mode_replace(dev, &modes[i], + drm_connector_pick_cmdline_mode(connector)); - /* go for command line mode first */ - modes[i] = drm_connector_pick_cmdline_mode(connector); - - /* try for preferred next */ if (!modes[i]) { - drm_dbg_kms(dev, - "[CONNECTOR:%d:%s] looking for preferred mode, has tile: %s\n", - connector->base.id, connector->name, - str_yes_no(connector->has_tile)); - modes[i] = drm_connector_preferred_mode(connector, width, height); + mode_type = "preferred"; + mode_replace(dev, &modes[i], + drm_connector_preferred_mode(connector, width, height)); } - /* No preferred mode marked by the EDID? Are there any modes? */ - if (!modes[i] && !list_empty(&connector->modes)) { - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] using first listed mode\n", - connector->base.id, connector->name); - modes[i] = drm_connector_first_mode(connector); + if (!modes[i]) { + mode_type = "first"; + mode_replace(dev, &modes[i], + drm_connector_first_mode(connector)); } /* last resort: use current mode */ if (!modes[i]) { - /* - * IMPORTANT: We want to use the adjusted mode (i.e. - * after the panel fitter upscaling) as the initial - * config, not the input mode, which is what crtc->mode - * usually contains. But since our current - * code puts a mode derived from the post-pfit timings - * into crtc->mode this works out correctly. - * - * This is crtc->mode and not crtc->state->mode for the - * fastboot check to work correctly. - */ - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] looking for current mode\n", - connector->base.id, connector->name); - modes[i] = &connector->state->crtc->mode; + mode_type = "current"; + mode_replace(dev, &modes[i], + &crtc->state->mode); } + /* * In case of tiled modes, if all tiles are not present * then fallback to a non tiled mode. */ if (connector->has_tile && num_tiled_conns < connector->num_h_tile * connector->num_v_tile) { - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Falling back to non-tiled mode\n", - connector->base.id, connector->name); - modes[i] = drm_connector_fallback_non_tiled_mode(connector); + mode_type = "non tiled"; + mode_replace(dev, &modes[i], + drm_connector_fallback_non_tiled_mode(connector)); } - crtcs[i] = new_crtc; + crtcs[i] = crtc; - drm_dbg_kms(dev, "[CONNECTOR:%d:%s] on [CRTC:%d:%s]: %dx%d%s\n", + drm_dbg_kms(dev, "[CONNECTOR::%d:%s] on [CRTC:%d:%s] using %s mode: %s\n", connector->base.id, connector->name, - connector->state->crtc->base.id, - connector->state->crtc->name, - modes[i]->hdisplay, modes[i]->vdisplay, - modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" : ""); + crtc->base.id, crtc->name, + mode_type, modes[i]->name); fallback = false; conn_configured |= BIT(i); @@ -799,8 +820,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int total_modes_count = 0; struct drm_client_offset *offsets; unsigned int connector_count = 0; - /* points to modes protected by mode_config.mutex */ - struct drm_display_mode **modes; + const struct drm_display_mode **modes; struct drm_crtc **crtcs; int i, ret = 0; bool *enabled; @@ -851,7 +871,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, if (!drm_client_firmware_config(client, connectors, connector_count, crtcs, modes, offsets, enabled, width, height)) { - memset(modes, 0, connector_count * sizeof(*modes)); + modes_destroy(dev, modes, connector_count); memset(crtcs, 0, connector_count * sizeof(*crtcs)); memset(offsets, 0, connector_count * sizeof(*offsets)); @@ -868,10 +888,12 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, crtcs, modes, 0, width, height); } + mutex_unlock(&dev->mode_config.mutex); + drm_client_modeset_release(client); for (i = 0; i < connector_count; i++) { - struct drm_display_mode *mode = modes[i]; + const struct drm_display_mode *mode = modes[i]; struct drm_crtc *crtc = crtcs[i]; struct drm_client_offset *offset = &offsets[i]; @@ -902,11 +924,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, modeset->y = offset->y; } } - mutex_unlock(&dev->mode_config.mutex); mutex_unlock(&client->modeset_mutex); out: kfree(crtcs); + modes_destroy(dev, modes, connector_count); kfree(modes); kfree(offsets); kfree(enabled); @@ -938,7 +960,7 @@ bool drm_client_rotation(struct drm_mode_set *modeset, unsigned int *rotation) struct drm_plane *plane = modeset->crtc->primary; struct drm_cmdline_mode *cmdline; u64 valid_mask = 0; - unsigned int i; + int i; if (!modeset->num_connectors) return false; @@ -1219,11 +1241,12 @@ static void drm_client_modeset_dpms_legacy(struct drm_client_dev *client, int dp struct drm_connector *connector; struct drm_mode_set *modeset; struct drm_modeset_acquire_ctx ctx; - int j; int ret; DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret); drm_client_for_each_modeset(modeset, client) { + int j; + if (!modeset->crtc->enabled) continue; diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 0955f1c385dd..39497493f74c 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -334,7 +334,6 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, if (!encoder_funcs) continue; - encoder_funcs = encoder->helper_private; if (encoder_funcs->mode_fixup) { if (!(ret = encoder_funcs->mode_fixup(encoder, mode, adjusted_mode))) { diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 6b2178864c7e..3dfd8b34dceb 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -740,40 +740,6 @@ void drm_debugfs_crtc_remove(struct drm_crtc *crtc) crtc->debugfs_entry = NULL; } -static int bridges_show(struct seq_file *m, void *data) -{ - struct drm_encoder *encoder = m->private; - struct drm_printer p = drm_seq_file_printer(m); - struct drm_bridge *bridge; - unsigned int idx = 0; - - drm_for_each_bridge_in_chain(encoder, bridge) { - drm_printf(&p, "bridge[%u]: %ps\n", idx++, bridge->funcs); - drm_printf(&p, "\ttype: [%d] %s\n", - bridge->type, - drm_get_connector_type_name(bridge->type)); - - if (bridge->of_node) - drm_printf(&p, "\tOF: %pOFfc\n", bridge->of_node); - - drm_printf(&p, "\tops: [0x%x]", bridge->ops); - if (bridge->ops & DRM_BRIDGE_OP_DETECT) - drm_puts(&p, " detect"); - if (bridge->ops & DRM_BRIDGE_OP_EDID) - drm_puts(&p, " edid"); - if (bridge->ops & DRM_BRIDGE_OP_HPD) - drm_puts(&p, " hpd"); - if (bridge->ops & DRM_BRIDGE_OP_MODES) - drm_puts(&p, " modes"); - if (bridge->ops & DRM_BRIDGE_OP_HDMI) - drm_puts(&p, " hdmi"); - drm_puts(&p, "\n"); - } - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(bridges); - void drm_debugfs_encoder_add(struct drm_encoder *encoder) { struct drm_minor *minor = encoder->dev->primary; @@ -789,9 +755,7 @@ void drm_debugfs_encoder_add(struct drm_encoder *encoder) encoder->debugfs_entry = root; - /* bridges list */ - debugfs_create_file("bridges", 0444, root, encoder, - &bridges_fops); + drm_bridge_debugfs_encoder_params(root, encoder); if (encoder->funcs && encoder->funcs->debugfs_init) encoder->funcs->debugfs_init(encoder, root); diff --git a/drivers/gpu/drm/drm_displayid_internal.h b/drivers/gpu/drm/drm_displayid_internal.h index aee1b86a73c1..957dd0619f5c 100644 --- a/drivers/gpu/drm/drm_displayid_internal.h +++ b/drivers/gpu/drm/drm_displayid_internal.h @@ -66,6 +66,7 @@ struct drm_edid; #define DATA_BLOCK_2_STEREO_DISPLAY_INTERFACE 0x27 #define DATA_BLOCK_2_TILED_DISPLAY_TOPOLOGY 0x28 #define DATA_BLOCK_2_CONTAINER_ID 0x29 +#define DATA_BLOCK_2_TYPE_10_FORMULA_TIMING 0x2a #define DATA_BLOCK_2_VENDOR_SPECIFIC 0x7e #define DATA_BLOCK_2_CTA_DISPLAY_ID 0x81 @@ -114,20 +115,32 @@ struct displayid_tiled_block { struct displayid_detailed_timings_1 { u8 pixel_clock[3]; u8 flags; - u8 hactive[2]; - u8 hblank[2]; - u8 hsync[2]; - u8 hsw[2]; - u8 vactive[2]; - u8 vblank[2]; - u8 vsync[2]; - u8 vsw[2]; + __le16 hactive; + __le16 hblank; + __le16 hsync; + __le16 hsw; + __le16 vactive; + __le16 vblank; + __le16 vsync; + __le16 vsw; } __packed; struct displayid_detailed_timing_block { struct displayid_block base; struct displayid_detailed_timings_1 timings[]; -}; +} __packed; + +struct displayid_formula_timings_9 { + u8 flags; + __le16 hactive; + __le16 vactive; + u8 vrefresh; +} __packed; + +struct displayid_formula_timing_block { + struct displayid_block base; + struct displayid_formula_timings_9 timings[]; +} __packed; #define DISPLAYID_VESA_MSO_OVERLAP GENMASK(3, 0) #define DISPLAYID_VESA_MSO_MODE GENMASK(6, 5) diff --git a/drivers/gpu/drm/drm_draw.c b/drivers/gpu/drm/drm_draw.c index 385eb5e10047..9dc0408fbbea 100644 --- a/drivers/gpu/drm/drm_draw.c +++ b/drivers/gpu/drm/drm_draw.c @@ -13,85 +13,7 @@ #include #include "drm_draw_internal.h" - -/* - * Conversions from xrgb8888 - */ - -static u16 convert_xrgb8888_to_rgb565(u32 pix) -{ - return ((pix & 0x00F80000) >> 8) | - ((pix & 0x0000FC00) >> 5) | - ((pix & 0x000000F8) >> 3); -} - -static u16 convert_xrgb8888_to_rgba5551(u32 pix) -{ - return ((pix & 0x00f80000) >> 8) | - ((pix & 0x0000f800) >> 5) | - ((pix & 0x000000f8) >> 2) | - BIT(0); /* set alpha bit */ -} - -static u16 convert_xrgb8888_to_xrgb1555(u32 pix) -{ - return ((pix & 0x00f80000) >> 9) | - ((pix & 0x0000f800) >> 6) | - ((pix & 0x000000f8) >> 3); -} - -static u16 convert_xrgb8888_to_argb1555(u32 pix) -{ - return BIT(15) | /* set alpha bit */ - ((pix & 0x00f80000) >> 9) | - ((pix & 0x0000f800) >> 6) | - ((pix & 0x000000f8) >> 3); -} - -static u32 convert_xrgb8888_to_argb8888(u32 pix) -{ - return pix | GENMASK(31, 24); /* fill alpha bits */ -} - -static u32 convert_xrgb8888_to_xbgr8888(u32 pix) -{ - return ((pix & 0x00ff0000) >> 16) << 0 | - ((pix & 0x0000ff00) >> 8) << 8 | - ((pix & 0x000000ff) >> 0) << 16 | - ((pix & 0xff000000) >> 24) << 24; -} - -static u32 convert_xrgb8888_to_abgr8888(u32 pix) -{ - return ((pix & 0x00ff0000) >> 16) << 0 | - ((pix & 0x0000ff00) >> 8) << 8 | - ((pix & 0x000000ff) >> 0) << 16 | - GENMASK(31, 24); /* fill alpha bits */ -} - -static u32 convert_xrgb8888_to_xrgb2101010(u32 pix) -{ - pix = ((pix & 0x000000FF) << 2) | - ((pix & 0x0000FF00) << 4) | - ((pix & 0x00FF0000) << 6); - return pix | ((pix >> 8) & 0x00300C03); -} - -static u32 convert_xrgb8888_to_argb2101010(u32 pix) -{ - pix = ((pix & 0x000000FF) << 2) | - ((pix & 0x0000FF00) << 4) | - ((pix & 0x00FF0000) << 6); - return GENMASK(31, 30) /* set alpha bits */ | pix | ((pix >> 8) & 0x00300C03); -} - -static u32 convert_xrgb8888_to_abgr2101010(u32 pix) -{ - pix = ((pix & 0x00FF0000) >> 14) | - ((pix & 0x0000FF00) << 4) | - ((pix & 0x000000FF) << 22); - return GENMASK(31, 30) /* set alpha bits */ | pix | ((pix >> 8) & 0x00300C03); -} +#include "drm_format_internal.h" /** * drm_draw_color_from_xrgb8888 - convert one pixel from xrgb8888 to the desired format @@ -106,28 +28,28 @@ u32 drm_draw_color_from_xrgb8888(u32 color, u32 format) { switch (format) { case DRM_FORMAT_RGB565: - return convert_xrgb8888_to_rgb565(color); + return drm_pixel_xrgb8888_to_rgb565(color); case DRM_FORMAT_RGBA5551: - return convert_xrgb8888_to_rgba5551(color); + return drm_pixel_xrgb8888_to_rgba5551(color); case DRM_FORMAT_XRGB1555: - return convert_xrgb8888_to_xrgb1555(color); + return drm_pixel_xrgb8888_to_xrgb1555(color); case DRM_FORMAT_ARGB1555: - return convert_xrgb8888_to_argb1555(color); + return drm_pixel_xrgb8888_to_argb1555(color); case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: return color; case DRM_FORMAT_ARGB8888: - return convert_xrgb8888_to_argb8888(color); + return drm_pixel_xrgb8888_to_argb8888(color); case DRM_FORMAT_XBGR8888: - return convert_xrgb8888_to_xbgr8888(color); + return drm_pixel_xrgb8888_to_xbgr8888(color); case DRM_FORMAT_ABGR8888: - return convert_xrgb8888_to_abgr8888(color); + return drm_pixel_xrgb8888_to_abgr8888(color); case DRM_FORMAT_XRGB2101010: - return convert_xrgb8888_to_xrgb2101010(color); + return drm_pixel_xrgb8888_to_xrgb2101010(color); case DRM_FORMAT_ARGB2101010: - return convert_xrgb8888_to_argb2101010(color); + return drm_pixel_xrgb8888_to_argb2101010(color); case DRM_FORMAT_ABGR2101010: - return convert_xrgb8888_to_abgr2101010(color); + return drm_pixel_xrgb8888_to_abgr2101010(color); default: WARN_ONCE(1, "Can't convert to %p4cc\n", &format); return 0; diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 60e5ac179c15..56dd61f8e05a 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -500,6 +501,25 @@ void drm_dev_unplug(struct drm_device *dev) } EXPORT_SYMBOL(drm_dev_unplug); +/** + * drm_dev_set_dma_dev - set the DMA device for a DRM device + * @dev: DRM device + * @dma_dev: DMA device or NULL + * + * Sets the DMA device of the given DRM device. Only required if + * the DMA device is different from the DRM device's parent. After + * calling this function, the DRM device holds a reference on + * @dma_dev. Pass NULL to clear the DMA device. + */ +void drm_dev_set_dma_dev(struct drm_device *dev, struct device *dma_dev) +{ + dma_dev = get_device(dma_dev); + + put_device(dev->dma_dev); + dev->dma_dev = dma_dev; +} +EXPORT_SYMBOL(drm_dev_set_dma_dev); + /* * Available recovery methods for wedged device. To be sent along with device * wedged uevent. @@ -654,6 +674,8 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) { drm_fs_inode_free(dev->anon_inode); + put_device(dev->dma_dev); + dev->dma_dev = NULL; put_device(dev->dev); /* Prevent use-after-free in drm_managed_release when debugging is * enabled. Slightly awkward, but can't really be helped. */ @@ -807,6 +829,47 @@ void *__devm_drm_dev_alloc(struct device *parent, } EXPORT_SYMBOL(__devm_drm_dev_alloc); +/** + * __drm_dev_alloc - Allocation of a &drm_device instance + * @parent: Parent device object + * @driver: DRM driver + * @size: the size of the struct which contains struct drm_device + * @offset: the offset of the &drm_device within the container. + * + * This should *NOT* be by any drivers, but is a dedicated interface for the + * corresponding Rust abstraction. + * + * This is the same as devm_drm_dev_alloc(), but without the corresponding + * resource management through the parent device, but not the same as + * drm_dev_alloc(), since the latter is the deprecated version, which does not + * support subclassing. + * + * Returns: A pointer to new DRM device, or an ERR_PTR on failure. + */ +void *__drm_dev_alloc(struct device *parent, + const struct drm_driver *driver, + size_t size, size_t offset) +{ + void *container; + struct drm_device *drm; + int ret; + + container = kzalloc(size, GFP_KERNEL); + if (!container) + return ERR_PTR(-ENOMEM); + + drm = container + offset; + ret = drm_dev_init(drm, driver, parent); + if (ret) { + kfree(container); + return ERR_PTR(ret); + } + drmm_add_final_kfree(drm, container); + + return container; +} +EXPORT_SYMBOL(__drm_dev_alloc); + /** * drm_dev_alloc - Allocate new DRM device * @driver: DRM driver to allocate device for @@ -822,22 +885,7 @@ EXPORT_SYMBOL(__devm_drm_dev_alloc); struct drm_device *drm_dev_alloc(const struct drm_driver *driver, struct device *parent) { - struct drm_device *dev; - int ret; - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) - return ERR_PTR(-ENOMEM); - - ret = drm_dev_init(dev, driver, parent); - if (ret) { - kfree(dev); - return ERR_PTR(ret); - } - - drmm_add_final_kfree(dev, dev); - - return dev; + return __drm_dev_alloc(parent, driver, sizeof(struct drm_device), 0); } EXPORT_SYMBOL(drm_dev_alloc); @@ -1188,6 +1236,7 @@ static int __init drm_core_init(void) } drm_debugfs_root = debugfs_create_dir("dri", NULL); + drm_bridge_debugfs_params(drm_debugfs_root); ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops); if (ret < 0) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 9edb3247c767..74e77742b2bd 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6761,23 +6761,23 @@ out: } static struct drm_display_mode *drm_mode_displayid_detailed(struct drm_device *dev, - struct displayid_detailed_timings_1 *timings, + const struct displayid_detailed_timings_1 *timings, bool type_7) { struct drm_display_mode *mode; - unsigned pixel_clock = (timings->pixel_clock[0] | - (timings->pixel_clock[1] << 8) | - (timings->pixel_clock[2] << 16)) + 1; - unsigned hactive = (timings->hactive[0] | timings->hactive[1] << 8) + 1; - unsigned hblank = (timings->hblank[0] | timings->hblank[1] << 8) + 1; - unsigned hsync = (timings->hsync[0] | (timings->hsync[1] & 0x7f) << 8) + 1; - unsigned hsync_width = (timings->hsw[0] | timings->hsw[1] << 8) + 1; - unsigned vactive = (timings->vactive[0] | timings->vactive[1] << 8) + 1; - unsigned vblank = (timings->vblank[0] | timings->vblank[1] << 8) + 1; - unsigned vsync = (timings->vsync[0] | (timings->vsync[1] & 0x7f) << 8) + 1; - unsigned vsync_width = (timings->vsw[0] | timings->vsw[1] << 8) + 1; - bool hsync_positive = (timings->hsync[1] >> 7) & 0x1; - bool vsync_positive = (timings->vsync[1] >> 7) & 0x1; + unsigned int pixel_clock = (timings->pixel_clock[0] | + (timings->pixel_clock[1] << 8) | + (timings->pixel_clock[2] << 16)) + 1; + unsigned int hactive = le16_to_cpu(timings->hactive) + 1; + unsigned int hblank = le16_to_cpu(timings->hblank) + 1; + unsigned int hsync = (le16_to_cpu(timings->hsync) & 0x7fff) + 1; + unsigned int hsync_width = le16_to_cpu(timings->hsw) + 1; + unsigned int vactive = le16_to_cpu(timings->vactive) + 1; + unsigned int vblank = le16_to_cpu(timings->vblank) + 1; + unsigned int vsync = (le16_to_cpu(timings->vsync) & 0x7fff) + 1; + unsigned int vsync_width = le16_to_cpu(timings->vsw) + 1; + bool hsync_positive = le16_to_cpu(timings->hsync) & (1 << 15); + bool vsync_positive = le16_to_cpu(timings->vsync) & (1 << 15); mode = drm_mode_create(dev); if (!mode) @@ -6834,6 +6834,66 @@ static int add_displayid_detailed_1_modes(struct drm_connector *connector, return num_modes; } +static struct drm_display_mode *drm_mode_displayid_formula(struct drm_device *dev, + const struct displayid_formula_timings_9 *timings, + bool type_10) +{ + struct drm_display_mode *mode; + u16 hactive = le16_to_cpu(timings->hactive) + 1; + u16 vactive = le16_to_cpu(timings->vactive) + 1; + u8 timing_formula = timings->flags & 0x7; + + /* TODO: support RB-v2 & RB-v3 */ + if (timing_formula > 1) + return NULL; + + /* TODO: support video-optimized refresh rate */ + if (timings->flags & (1 << 4)) + drm_dbg_kms(dev, "Fractional vrefresh is not implemented, proceeding with non-video-optimized refresh rate"); + + mode = drm_cvt_mode(dev, hactive, vactive, timings->vrefresh + 1, timing_formula == 1, false, false); + if (!mode) + return NULL; + + /* TODO: interpret S3D flags */ + + mode->type = DRM_MODE_TYPE_DRIVER; + drm_mode_set_name(mode); + + return mode; +} + +static int add_displayid_formula_modes(struct drm_connector *connector, + const struct displayid_block *block) +{ + const struct displayid_formula_timing_block *formula_block = (struct displayid_formula_timing_block *)block; + int num_timings; + struct drm_display_mode *newmode; + int num_modes = 0; + bool type_10 = block->tag == DATA_BLOCK_2_TYPE_10_FORMULA_TIMING; + int timing_size = 6 + ((formula_block->base.rev & 0x70) >> 4); + + /* extended blocks are not supported yet */ + if (timing_size != 6) + return 0; + + if (block->num_bytes % timing_size) + return 0; + + num_timings = block->num_bytes / timing_size; + for (int i = 0; i < num_timings; i++) { + const struct displayid_formula_timings_9 *timings = &formula_block->timings[i]; + + newmode = drm_mode_displayid_formula(connector->dev, timings, type_10); + if (!newmode) + continue; + + drm_mode_probed_add(connector, newmode); + num_modes++; + } + return num_modes; +} + static int add_displayid_detailed_modes(struct drm_connector *connector, const struct drm_edid *drm_edid) { @@ -6846,6 +6906,9 @@ static int add_displayid_detailed_modes(struct drm_connector *connector, if (block->tag == DATA_BLOCK_TYPE_1_DETAILED_TIMING || block->tag == DATA_BLOCK_2_TYPE_7_DETAILED_TIMING) num_modes += add_displayid_detailed_1_modes(connector, block); + else if (block->tag == DATA_BLOCK_2_TYPE_9_FORMULA_TIMING || + block->tag == DATA_BLOCK_2_TYPE_10_FORMULA_TIMING) + num_modes += add_displayid_formula_modes(connector, block); } displayid_iter_end(&iter); @@ -7100,18 +7163,12 @@ EXPORT_SYMBOL(drm_add_edid_modes); * Return: The number of modes added or 0 if we couldn't find any. */ int drm_add_modes_noedid(struct drm_connector *connector, - int hdisplay, int vdisplay) + unsigned int hdisplay, unsigned int vdisplay) { - int i, count, num_modes = 0; + int i, count = ARRAY_SIZE(drm_dmt_modes), num_modes = 0; struct drm_display_mode *mode; struct drm_device *dev = connector->dev; - count = ARRAY_SIZE(drm_dmt_modes); - if (hdisplay < 0) - hdisplay = 0; - if (vdisplay < 0) - vdisplay = 0; - for (i = 0; i < count; i++) { const struct drm_display_mode *ptr = &drm_dmt_modes[i]; diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index cf2463090d3a..246cf845e2c9 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -992,6 +992,40 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) } EXPORT_SYMBOL(drm_show_fdinfo); +/** + * drm_file_err - log process name, pid and client_name associated with a drm_file + * @file_priv: context of interest for process name and pid + * @fmt: printf() like format string + * + * Helper function for clients which needs to log process details such + * as name and pid etc along with user logs. + */ +void drm_file_err(struct drm_file *file_priv, const char *fmt, ...) +{ + va_list args; + struct va_format vaf; + struct pid *pid; + struct task_struct *task; + struct drm_device *dev = file_priv->minor->dev; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + mutex_lock(&file_priv->client_name_lock); + rcu_read_lock(); + pid = rcu_dereference(file_priv->pid); + task = pid_task(pid, PIDTYPE_TGID); + + drm_err(dev, "comm: %s pid: %d client: %s ... %pV", task ? task->comm : "Unset", + task ? task->pid : 0, file_priv->client_name ?: "Unset", &vaf); + + va_end(args); + rcu_read_unlock(); + mutex_unlock(&file_priv->client_name_lock); +} +EXPORT_SYMBOL(drm_file_err); + /** * mock_drm_getfile - Create a new struct file for the drm device * @minor: drm minor to wrap (e.g. #drm_device.primary) diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 01d3ab307ac3..d36e6cacc575 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -20,6 +20,8 @@ #include #include +#include "drm_format_internal.h" + /** * drm_format_conv_state_init - Initialize format-conversion state * @state: The state to initialize @@ -244,6 +246,152 @@ static int drm_fb_xfrm(struct iosys_map *dst, xfrm_line); } +#define ALIGN_DOWN_PIXELS(end, n, a) \ + ((end) - ((n) & ((a) - 1))) + +static __always_inline void drm_fb_xfrm_line_32to8(void *dbuf, const void *sbuf, + unsigned int pixels, + u32 (*xfrm_pixel)(u32)) +{ + __le32 *dbuf32 = dbuf; + u8 *dbuf8; + const __le32 *sbuf32 = sbuf; + const __le32 *send32 = sbuf32 + pixels; + + /* write 4 pixels at once */ + while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 4)) { + u32 pix[4] = { + le32_to_cpup(sbuf32), + le32_to_cpup(sbuf32 + 1), + le32_to_cpup(sbuf32 + 2), + le32_to_cpup(sbuf32 + 3), + }; + /* write output bytes in reverse order for little endianness */ + u32 val32 = xfrm_pixel(pix[0]) | + (xfrm_pixel(pix[1]) << 8) | + (xfrm_pixel(pix[2]) << 16) | + (xfrm_pixel(pix[3]) << 24); + *dbuf32++ = cpu_to_le32(val32); + sbuf32 += ARRAY_SIZE(pix); + } + + /* write trailing pixels */ + dbuf8 = (u8 __force *)dbuf32; + while (sbuf32 < send32) + *dbuf8++ = xfrm_pixel(le32_to_cpup(sbuf32++)); +} + +static __always_inline void drm_fb_xfrm_line_32to16(void *dbuf, const void *sbuf, + unsigned int pixels, + u32 (*xfrm_pixel)(u32)) +{ + __le64 *dbuf64 = dbuf; + __le32 *dbuf32; + __le16 *dbuf16; + const __le32 *sbuf32 = sbuf; + const __le32 *send32 = sbuf32 + pixels; + +#if defined(CONFIG_64BIT) + /* write 4 pixels at once */ + while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 4)) { + u32 pix[4] = { + le32_to_cpup(sbuf32), + le32_to_cpup(sbuf32 + 1), + le32_to_cpup(sbuf32 + 2), + le32_to_cpup(sbuf32 + 3), + }; + /* write output bytes in reverse order for little endianness */ + u64 val64 = ((u64)xfrm_pixel(pix[0])) | + ((u64)xfrm_pixel(pix[1]) << 16) | + ((u64)xfrm_pixel(pix[2]) << 32) | + ((u64)xfrm_pixel(pix[3]) << 48); + *dbuf64++ = cpu_to_le64(val64); + sbuf32 += ARRAY_SIZE(pix); + } +#endif + + /* write 2 pixels at once */ + dbuf32 = (__le32 __force *)dbuf64; + while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 2)) { + u32 pix[2] = { + le32_to_cpup(sbuf32), + le32_to_cpup(sbuf32 + 1), + }; + /* write output bytes in reverse order for little endianness */ + u32 val32 = xfrm_pixel(pix[0]) | + (xfrm_pixel(pix[1]) << 16); + *dbuf32++ = cpu_to_le32(val32); + sbuf32 += ARRAY_SIZE(pix); + } + + /* write trailing pixel */ + dbuf16 = (__le16 __force *)dbuf32; + while (sbuf32 < send32) + *dbuf16++ = cpu_to_le16(xfrm_pixel(le32_to_cpup(sbuf32++))); +} + +static __always_inline void drm_fb_xfrm_line_32to24(void *dbuf, const void *sbuf, + unsigned int pixels, + u32 (*xfrm_pixel)(u32)) +{ + __le32 *dbuf32 = dbuf; + u8 *dbuf8; + const __le32 *sbuf32 = sbuf; + const __le32 *send32 = sbuf32 + pixels; + + /* write pixels in chunks of 4 */ + while (sbuf32 < ALIGN_DOWN_PIXELS(send32, pixels, 4)) { + u32 val24[4] = { + xfrm_pixel(le32_to_cpup(sbuf32)), + xfrm_pixel(le32_to_cpup(sbuf32 + 1)), + xfrm_pixel(le32_to_cpup(sbuf32 + 2)), + xfrm_pixel(le32_to_cpup(sbuf32 + 3)), + }; + u32 out32[3] = { + /* write output bytes in reverse order for little endianness */ + ((val24[0] & 0x000000ff)) | + ((val24[0] & 0x0000ff00)) | + ((val24[0] & 0x00ff0000)) | + ((val24[1] & 0x000000ff) << 24), + ((val24[1] & 0x0000ff00) >> 8) | + ((val24[1] & 0x00ff0000) >> 8) | + ((val24[2] & 0x000000ff) << 16) | + ((val24[2] & 0x0000ff00) << 16), + ((val24[2] & 0x00ff0000) >> 16) | + ((val24[3] & 0x000000ff) << 8) | + ((val24[3] & 0x0000ff00) << 8) | + ((val24[3] & 0x00ff0000) << 8), + }; + + *dbuf32++ = cpu_to_le32(out32[0]); + *dbuf32++ = cpu_to_le32(out32[1]); + *dbuf32++ = cpu_to_le32(out32[2]); + sbuf32 += ARRAY_SIZE(val24); + } + + /* write trailing pixel */ + dbuf8 = (u8 __force *)dbuf32; + while (sbuf32 < send32) { + u32 val24 = xfrm_pixel(le32_to_cpup(sbuf32++)); + /* write output in reverse order for little endianness */ + *dbuf8++ = (val24 & 0x000000ff); + *dbuf8++ = (val24 & 0x0000ff00) >> 8; + *dbuf8++ = (val24 & 0x00ff0000) >> 16; + } +} + +static __always_inline void drm_fb_xfrm_line_32to32(void *dbuf, const void *sbuf, + unsigned int pixels, + u32 (*xfrm_pixel)(u32)) +{ + __le32 *dbuf32 = dbuf; + const __le32 *sbuf32 = sbuf; + const __le32 *send32 = sbuf32 + pixels; + + while (sbuf32 < send32) + *dbuf32++ = cpu_to_le32(xfrm_pixel(le32_to_cpup(sbuf32++))); +} + /** * drm_fb_memcpy - Copy clip buffer * @dst: Array of destination buffers @@ -368,17 +516,7 @@ EXPORT_SYMBOL(drm_fb_swab); static void drm_fb_xrgb8888_to_rgb332_line(void *dbuf, const void *sbuf, unsigned int pixels) { - u8 *dbuf8 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - dbuf8[x] = ((pix & 0x00e00000) >> 16) | - ((pix & 0x0000e000) >> 11) | - ((pix & 0x000000c0) >> 6); - } + drm_fb_xfrm_line_32to8(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb332); } /** @@ -417,38 +555,19 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb332); static void drm_fb_xrgb8888_to_rgb565_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le16 *dbuf16 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u16 val16; - u32 pix; + drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb565); +} - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - val16 = ((pix & 0x00F80000) >> 8) | - ((pix & 0x0000FC00) >> 5) | - ((pix & 0x000000F8) >> 3); - dbuf16[x] = cpu_to_le16(val16); - } +static __always_inline u32 drm_xrgb8888_to_rgb565_swab(u32 pix) +{ + return swab16(drm_pixel_xrgb8888_to_rgb565(pix)); } /* TODO: implement this helper as conversion to RGB565|BIG_ENDIAN */ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le16 *dbuf16 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u16 val16; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - val16 = ((pix & 0x00F80000) >> 8) | - ((pix & 0x0000FC00) >> 5) | - ((pix & 0x000000F8) >> 3); - dbuf16[x] = cpu_to_le16(swab16(val16)); - } + drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_xrgb8888_to_rgb565_swab); } /** @@ -495,19 +614,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le16 *dbuf16 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u16 val16; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - val16 = ((pix & 0x00f80000) >> 9) | - ((pix & 0x0000f800) >> 6) | - ((pix & 0x000000f8) >> 3); - dbuf16[x] = cpu_to_le16(val16); - } + drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_xrgb1555); } /** @@ -547,20 +654,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb1555); static void drm_fb_xrgb8888_to_argb1555_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le16 *dbuf16 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u16 val16; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - val16 = BIT(15) | /* set alpha bit */ - ((pix & 0x00f80000) >> 9) | - ((pix & 0x0000f800) >> 6) | - ((pix & 0x000000f8) >> 3); - dbuf16[x] = cpu_to_le16(val16); - } + drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_argb1555); } /** @@ -600,20 +694,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb1555); static void drm_fb_xrgb8888_to_rgba5551_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le16 *dbuf16 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u16 val16; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - val16 = ((pix & 0x00f80000) >> 8) | - ((pix & 0x0000f800) >> 5) | - ((pix & 0x000000f8) >> 2) | - BIT(0); /* set alpha bit */ - dbuf16[x] = cpu_to_le16(val16); - } + drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgba5551); } /** @@ -653,18 +734,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgba5551); static void drm_fb_xrgb8888_to_rgb888_line(void *dbuf, const void *sbuf, unsigned int pixels) { - u8 *dbuf8 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - /* write blue-green-red to output in little endianness */ - *dbuf8++ = (pix & 0x000000FF) >> 0; - *dbuf8++ = (pix & 0x0000FF00) >> 8; - *dbuf8++ = (pix & 0x00FF0000) >> 16; - } + drm_fb_xfrm_line_32to24(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb888); } /** @@ -704,18 +774,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb888); static void drm_fb_xrgb8888_to_bgr888_line(void *dbuf, const void *sbuf, unsigned int pixels) { - u8 *dbuf8 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - /* write red-green-blue to output in little endianness */ - *dbuf8++ = (pix & 0x00ff0000) >> 16; - *dbuf8++ = (pix & 0x0000ff00) >> 8; - *dbuf8++ = (pix & 0x000000ff) >> 0; - } + drm_fb_xfrm_line_32to24(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_bgr888); } /** @@ -755,16 +814,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_bgr888); static void drm_fb_xrgb8888_to_argb8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le32 *dbuf32 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - pix |= GENMASK(31, 24); /* fill alpha bits */ - dbuf32[x] = cpu_to_le32(pix); - } + drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_argb8888); } /** @@ -804,19 +854,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb8888); static void drm_fb_xrgb8888_to_abgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le32 *dbuf32 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - pix = ((pix & 0x00ff0000) >> 16) << 0 | - ((pix & 0x0000ff00) >> 8) << 8 | - ((pix & 0x000000ff) >> 0) << 16 | - GENMASK(31, 24); /* fill alpha bits */ - *dbuf32++ = cpu_to_le32(pix); - } + drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_abgr8888); } static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, @@ -835,19 +873,7 @@ static void drm_fb_xrgb8888_to_abgr8888(struct iosys_map *dst, const unsigned in static void drm_fb_xrgb8888_to_xbgr8888_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le32 *dbuf32 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - pix = ((pix & 0x00ff0000) >> 16) << 0 | - ((pix & 0x0000ff00) >> 8) << 8 | - ((pix & 0x000000ff) >> 0) << 16 | - ((pix & 0xff000000) >> 24) << 24; - *dbuf32++ = cpu_to_le32(pix); - } + drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_xbgr8888); } static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned int *dst_pitch, @@ -866,20 +892,7 @@ static void drm_fb_xrgb8888_to_xbgr8888(struct iosys_map *dst, const unsigned in static void drm_fb_xrgb8888_to_xrgb2101010_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le32 *dbuf32 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 val32; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - val32 = ((pix & 0x000000FF) << 2) | - ((pix & 0x0000FF00) << 4) | - ((pix & 0x00FF0000) << 6); - pix = val32 | ((val32 >> 8) & 0x00300C03); - *dbuf32++ = cpu_to_le32(pix); - } + drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_xrgb2101010); } /** @@ -920,21 +933,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_xrgb2101010); static void drm_fb_xrgb8888_to_argb2101010_line(void *dbuf, const void *sbuf, unsigned int pixels) { - __le32 *dbuf32 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 val32; - u32 pix; - - for (x = 0; x < pixels; x++) { - pix = le32_to_cpu(sbuf32[x]); - val32 = ((pix & 0x000000ff) << 2) | - ((pix & 0x0000ff00) << 4) | - ((pix & 0x00ff0000) << 6); - pix = GENMASK(31, 30) | /* set alpha bits */ - val32 | ((val32 >> 8) & 0x00300c03); - *dbuf32++ = cpu_to_le32(pix); - } + drm_fb_xfrm_line_32to32(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_argb2101010); } /** @@ -975,19 +974,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_argb2101010); static void drm_fb_xrgb8888_to_gray8_line(void *dbuf, const void *sbuf, unsigned int pixels) { - u8 *dbuf8 = dbuf; - const __le32 *sbuf32 = sbuf; - unsigned int x; - - for (x = 0; x < pixels; x++) { - u32 pix = le32_to_cpu(sbuf32[x]); - u8 r = (pix & 0x00ff0000) >> 16; - u8 g = (pix & 0x0000ff00) >> 8; - u8 b = pix & 0x000000ff; - - /* ITU BT.601: Y = 0.299 R + 0.587 G + 0.114 B */ - *dbuf8++ = (3 * r + 6 * g + b) / 10; - } + drm_fb_xfrm_line_32to8(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_r8_bt601); } /** @@ -1031,36 +1018,7 @@ EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray8); static void drm_fb_argb8888_to_argb4444_line(void *dbuf, const void *sbuf, unsigned int pixels) { - unsigned int pixels2 = pixels & ~GENMASK_ULL(0, 0); - __le32 *dbuf32 = dbuf; - __le16 *dbuf16 = dbuf + pixels2 * sizeof(*dbuf16); - const __le32 *sbuf32 = sbuf; - unsigned int x; - u32 val32; - u16 val16; - u32 pix[2]; - - for (x = 0; x < pixels2; x += 2, ++dbuf32) { - pix[0] = le32_to_cpu(sbuf32[x]); - pix[1] = le32_to_cpu(sbuf32[x + 1]); - val32 = ((pix[0] & 0xf0000000) >> 16) | - ((pix[0] & 0x00f00000) >> 12) | - ((pix[0] & 0x0000f000) >> 8) | - ((pix[0] & 0x000000f0) >> 4) | - ((pix[1] & 0xf0000000) >> 0) | - ((pix[1] & 0x00f00000) << 4) | - ((pix[1] & 0x0000f000) << 8) | - ((pix[1] & 0x000000f0) << 12); - *dbuf32 = cpu_to_le32(val32); - } - for (; x < pixels; x++) { - pix[0] = le32_to_cpu(sbuf32[x]); - val16 = ((pix[0] & 0xf0000000) >> 16) | - ((pix[0] & 0x00f00000) >> 12) | - ((pix[0] & 0x0000f000) >> 8) | - ((pix[0] & 0x000000f0) >> 4); - dbuf16[x] = cpu_to_le16(val16); - } + drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_argb8888_to_argb4444); } /** diff --git a/drivers/gpu/drm/drm_format_internal.h b/drivers/gpu/drm/drm_format_internal.h new file mode 100644 index 000000000000..9f857bfa368d --- /dev/null +++ b/drivers/gpu/drm/drm_format_internal.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ + +#ifndef DRM_FORMAT_INTERNAL_H +#define DRM_FORMAT_INTERNAL_H + +#include +#include + +/* + * Each pixel-format conversion helper takes a raw pixel in a + * specific input format and returns a raw pixel in a specific + * output format. All pixels are in little-endian byte order. + * + * Function names are + * + * drm_pixel__to__() + * + * where and refer to pixel formats. The + * is optional and hints to the method used for the + * conversion. Helpers with no algorithm given apply pixel-bit + * shifting. + * + * The argument type is u32. We expect this to be wide enough to + * hold all conversion input from 32-bit RGB to any output format. + * The Linux kernel should avoid format conversion for anything + * but XRGB8888 input data. Converting from other format can still + * be acceptable in some cases. + * + * The return type is u32. It is wide enough to hold all conversion + * output from XRGB8888. For output formats wider than 32 bit, a + * return type of u64 would be acceptable. + */ + +/* + * Conversions from XRGB8888 + */ + +static inline u32 drm_pixel_xrgb8888_to_r8_bt601(u32 pix) +{ + u32 r = (pix & 0x00ff0000) >> 16; + u32 g = (pix & 0x0000ff00) >> 8; + u32 b = pix & 0x000000ff; + + /* ITU-R BT.601: Y = 0.299 R + 0.587 G + 0.114 B */ + return (3 * r + 6 * g + b) / 10; +} + +static inline u32 drm_pixel_xrgb8888_to_rgb332(u32 pix) +{ + return ((pix & 0x00e00000) >> 16) | + ((pix & 0x0000e000) >> 11) | + ((pix & 0x000000c0) >> 6); +} + +static inline u32 drm_pixel_xrgb8888_to_rgb565(u32 pix) +{ + return ((pix & 0x00f80000) >> 8) | + ((pix & 0x0000fc00) >> 5) | + ((pix & 0x000000f8) >> 3); +} + +static inline u32 drm_pixel_xrgb8888_to_rgbx5551(u32 pix) +{ + return ((pix & 0x00f80000) >> 8) | + ((pix & 0x0000f800) >> 5) | + ((pix & 0x000000f8) >> 2); +} + +static inline u32 drm_pixel_xrgb8888_to_rgba5551(u32 pix) +{ + return drm_pixel_xrgb8888_to_rgbx5551(pix) | + BIT(0); /* set alpha bit */ +} + +static inline u32 drm_pixel_xrgb8888_to_xrgb1555(u32 pix) +{ + return ((pix & 0x00f80000) >> 9) | + ((pix & 0x0000f800) >> 6) | + ((pix & 0x000000f8) >> 3); +} + +static inline u32 drm_pixel_xrgb8888_to_argb1555(u32 pix) +{ + return BIT(15) | /* set alpha bit */ + drm_pixel_xrgb8888_to_xrgb1555(pix); +} + +static inline u32 drm_pixel_xrgb8888_to_rgb888(u32 pix) +{ + return pix & GENMASK(23, 0); +} + +static inline u32 drm_pixel_xrgb8888_to_bgr888(u32 pix) +{ + return ((pix & 0x00ff0000) >> 16) | + ((pix & 0x0000ff00)) | + ((pix & 0x000000ff) << 16); +} + +static inline u32 drm_pixel_xrgb8888_to_argb8888(u32 pix) +{ + return GENMASK(31, 24) | /* fill alpha bits */ + pix; +} + +static inline u32 drm_pixel_xrgb8888_to_xbgr8888(u32 pix) +{ + return ((pix & 0xff000000)) | /* also copy filler bits */ + ((pix & 0x00ff0000) >> 16) | + ((pix & 0x0000ff00)) | + ((pix & 0x000000ff) << 16); +} + +static inline u32 drm_pixel_xrgb8888_to_abgr8888(u32 pix) +{ + return GENMASK(31, 24) | /* fill alpha bits */ + drm_pixel_xrgb8888_to_xbgr8888(pix); +} + +static inline u32 drm_pixel_xrgb8888_to_xrgb2101010(u32 pix) +{ + pix = ((pix & 0x000000ff) << 2) | + ((pix & 0x0000ff00) << 4) | + ((pix & 0x00ff0000) << 6); + return pix | ((pix >> 8) & 0x00300c03); +} + +static inline u32 drm_pixel_xrgb8888_to_argb2101010(u32 pix) +{ + return GENMASK(31, 30) | /* set alpha bits */ + drm_pixel_xrgb8888_to_xrgb2101010(pix); +} + +static inline u32 drm_pixel_xrgb8888_to_xbgr2101010(u32 pix) +{ + pix = ((pix & 0x00ff0000) >> 14) | + ((pix & 0x0000ff00) << 4) | + ((pix & 0x000000ff) << 22); + return pix | ((pix >> 8) & 0x00300c03); +} + +static inline u32 drm_pixel_xrgb8888_to_abgr2101010(u32 pix) +{ + return GENMASK(31, 30) | /* set alpha bits */ + drm_pixel_xrgb8888_to_xbgr2101010(pix); +} + +/* + * Conversion from ARGB8888 + */ + +static inline u32 drm_pixel_argb8888_to_argb4444(u32 pix) +{ + return ((pix & 0xf0000000) >> 16) | + ((pix & 0x00f00000) >> 12) | + ((pix & 0x0000f000) >> 8) | + ((pix & 0x000000f0) >> 4); +} + +#endif diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index c6240bab3fa5..1e659d2660f7 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1216,7 +1216,7 @@ void drm_gem_unpin(struct drm_gem_object *obj) dma_resv_unlock(obj->resv); } -int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) +int drm_gem_vmap_locked(struct drm_gem_object *obj, struct iosys_map *map) { int ret; @@ -1233,9 +1233,9 @@ int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) return 0; } -EXPORT_SYMBOL(drm_gem_vmap); +EXPORT_SYMBOL(drm_gem_vmap_locked); -void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) +void drm_gem_vunmap_locked(struct drm_gem_object *obj, struct iosys_map *map) { dma_resv_assert_held(obj->resv); @@ -1248,7 +1248,7 @@ void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) /* Always set the mapping to NULL. Callers may rely on this. */ iosys_map_clear(map); } -EXPORT_SYMBOL(drm_gem_vunmap); +EXPORT_SYMBOL(drm_gem_vunmap_locked); void drm_gem_lock(struct drm_gem_object *obj) { @@ -1262,25 +1262,25 @@ void drm_gem_unlock(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_unlock); -int drm_gem_vmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) +int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) { int ret; dma_resv_lock(obj->resv, NULL); - ret = drm_gem_vmap(obj, map); + ret = drm_gem_vmap_locked(obj, map); dma_resv_unlock(obj->resv); return ret; } -EXPORT_SYMBOL(drm_gem_vmap_unlocked); +EXPORT_SYMBOL(drm_gem_vmap); -void drm_gem_vunmap_unlocked(struct drm_gem_object *obj, struct iosys_map *map) +void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { dma_resv_lock(obj->resv, NULL); - drm_gem_vunmap(obj, map); + drm_gem_vunmap_locked(obj, map); dma_resv_unlock(obj->resv); } -EXPORT_SYMBOL(drm_gem_vunmap_unlocked); +EXPORT_SYMBOL(drm_gem_vunmap); /** * drm_gem_lock_reservations - Sets up the ww context and acquires @@ -1543,10 +1543,10 @@ tail: EXPORT_SYMBOL(drm_gem_lru_scan); /** - * drm_gem_evict - helper to evict backing pages for a GEM object + * drm_gem_evict_locked - helper to evict backing pages for a GEM object * @obj: obj in question */ -int drm_gem_evict(struct drm_gem_object *obj) +int drm_gem_evict_locked(struct drm_gem_object *obj) { dma_resv_assert_held(obj->resv); @@ -1558,4 +1558,4 @@ int drm_gem_evict(struct drm_gem_object *obj) return 0; } -EXPORT_SYMBOL(drm_gem_evict); +EXPORT_SYMBOL(drm_gem_evict_locked); diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 0fbeb686e561..6f72e7a0f427 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -362,7 +362,7 @@ int drm_gem_fb_vmap(struct drm_framebuffer *fb, struct iosys_map *map, ret = -EINVAL; goto err_drm_gem_vunmap; } - ret = drm_gem_vmap_unlocked(obj, &map[i]); + ret = drm_gem_vmap(obj, &map[i]); if (ret) goto err_drm_gem_vunmap; } @@ -384,7 +384,7 @@ err_drm_gem_vunmap: obj = drm_gem_fb_get_obj(fb, i); if (!obj) continue; - drm_gem_vunmap_unlocked(obj, &map[i]); + drm_gem_vunmap(obj, &map[i]); } return ret; } @@ -411,7 +411,7 @@ void drm_gem_fb_vunmap(struct drm_framebuffer *fb, struct iosys_map *map) continue; if (iosys_map_is_null(&map[i])) continue; - drm_gem_vunmap_unlocked(obj, &map[i]); + drm_gem_vunmap(obj, &map[i]); } } EXPORT_SYMBOL(drm_gem_fb_vunmap); diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index d99dee67353a..aa43265f4f4f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -165,7 +165,7 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) } else { dma_resv_lock(shmem->base.resv, NULL); - drm_WARN_ON(obj->dev, shmem->vmap_use_count); + drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count)); if (shmem->sgt) { dma_unmap_sgtable(obj->dev->dev, shmem->sgt, @@ -174,9 +174,10 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) kfree(shmem->sgt); } if (shmem->pages) - drm_gem_shmem_put_pages(shmem); + drm_gem_shmem_put_pages_locked(shmem); - drm_WARN_ON(obj->dev, shmem->pages_use_count); + drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count)); + drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_pin_count)); dma_resv_unlock(shmem->base.resv); } @@ -186,21 +187,20 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) } EXPORT_SYMBOL_GPL(drm_gem_shmem_free); -static int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem) +static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; struct page **pages; dma_resv_assert_held(shmem->base.resv); - if (shmem->pages_use_count++ > 0) + if (refcount_inc_not_zero(&shmem->pages_use_count)) return 0; pages = drm_gem_get_pages(obj); if (IS_ERR(pages)) { drm_dbg_kms(obj->dev, "Failed to get pages (%ld)\n", PTR_ERR(pages)); - shmem->pages_use_count = 0; return PTR_ERR(pages); } @@ -216,38 +216,36 @@ static int drm_gem_shmem_get_pages(struct drm_gem_shmem_object *shmem) shmem->pages = pages; + refcount_set(&shmem->pages_use_count, 1); + return 0; } /* - * drm_gem_shmem_put_pages - Decrease use count on the backing pages for a shmem GEM object + * drm_gem_shmem_put_pages_locked - Decrease use count on the backing pages for a shmem GEM object * @shmem: shmem GEM object * * This function decreases the use count and puts the backing pages when use drops to zero. */ -void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem) +void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; dma_resv_assert_held(shmem->base.resv); - if (drm_WARN_ON_ONCE(obj->dev, !shmem->pages_use_count)) - return; - - if (--shmem->pages_use_count > 0) - return; - + if (refcount_dec_and_test(&shmem->pages_use_count)) { #ifdef CONFIG_X86 - if (shmem->map_wc) - set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); + if (shmem->map_wc) + set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); #endif - drm_gem_put_pages(obj, shmem->pages, - shmem->pages_mark_dirty_on_put, - shmem->pages_mark_accessed_on_put); - shmem->pages = NULL; + drm_gem_put_pages(obj, shmem->pages, + shmem->pages_mark_dirty_on_put, + shmem->pages_mark_accessed_on_put); + shmem->pages = NULL; + } } -EXPORT_SYMBOL(drm_gem_shmem_put_pages); +EXPORT_SYMBOL_GPL(drm_gem_shmem_put_pages_locked); int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem) { @@ -257,7 +255,12 @@ int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem) drm_WARN_ON(shmem->base.dev, drm_gem_is_imported(&shmem->base)); - ret = drm_gem_shmem_get_pages(shmem); + if (refcount_inc_not_zero(&shmem->pages_pin_count)) + return 0; + + ret = drm_gem_shmem_get_pages_locked(shmem); + if (!ret) + refcount_set(&shmem->pages_pin_count, 1); return ret; } @@ -267,7 +270,8 @@ void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem) { dma_resv_assert_held(shmem->base.resv); - drm_gem_shmem_put_pages(shmem); + if (refcount_dec_and_test(&shmem->pages_pin_count)) + drm_gem_shmem_put_pages_locked(shmem); } EXPORT_SYMBOL(drm_gem_shmem_unpin_locked); @@ -288,6 +292,9 @@ int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem) drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); + if (refcount_inc_not_zero(&shmem->pages_pin_count)) + return 0; + ret = dma_resv_lock_interruptible(shmem->base.resv, NULL); if (ret) return ret; @@ -296,7 +303,7 @@ int drm_gem_shmem_pin(struct drm_gem_shmem_object *shmem) return ret; } -EXPORT_SYMBOL(drm_gem_shmem_pin); +EXPORT_SYMBOL_GPL(drm_gem_shmem_pin); /** * drm_gem_shmem_unpin - Unpin backing pages for a shmem GEM object @@ -311,14 +318,17 @@ void drm_gem_shmem_unpin(struct drm_gem_shmem_object *shmem) drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); + if (refcount_dec_not_one(&shmem->pages_pin_count)) + return; + dma_resv_lock(shmem->base.resv, NULL); drm_gem_shmem_unpin_locked(shmem); dma_resv_unlock(shmem->base.resv); } -EXPORT_SYMBOL(drm_gem_shmem_unpin); +EXPORT_SYMBOL_GPL(drm_gem_shmem_unpin); /* - * drm_gem_shmem_vmap - Create a virtual mapping for a shmem GEM object + * drm_gem_shmem_vmap_locked - Create a virtual mapping for a shmem GEM object * @shmem: shmem GEM object * @map: Returns the kernel virtual address of the SHMEM GEM object's backing * store. @@ -327,47 +337,43 @@ EXPORT_SYMBOL(drm_gem_shmem_unpin); * exists for the buffer backing the shmem GEM object. It hides the differences * between dma-buf imported and natively allocated objects. * - * Acquired mappings should be cleaned up by calling drm_gem_shmem_vunmap(). + * Acquired mappings should be cleaned up by calling drm_gem_shmem_vunmap_locked(). * * Returns: * 0 on success or a negative error code on failure. */ -int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, - struct iosys_map *map) +int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, + struct iosys_map *map) { struct drm_gem_object *obj = &shmem->base; int ret = 0; if (drm_gem_is_imported(obj)) { ret = dma_buf_vmap(obj->dma_buf, map); - if (!ret) { - if (drm_WARN_ON(obj->dev, map->is_iomem)) { - dma_buf_vunmap(obj->dma_buf, map); - return -EIO; - } - } } else { pgprot_t prot = PAGE_KERNEL; dma_resv_assert_held(shmem->base.resv); - if (shmem->vmap_use_count++ > 0) { + if (refcount_inc_not_zero(&shmem->vmap_use_count)) { iosys_map_set_vaddr(map, shmem->vaddr); return 0; } - ret = drm_gem_shmem_get_pages(shmem); + ret = drm_gem_shmem_pin_locked(shmem); if (ret) - goto err_zero_use; + return ret; if (shmem->map_wc) prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, VM_MAP, prot); - if (!shmem->vaddr) + if (!shmem->vaddr) { ret = -ENOMEM; - else + } else { iosys_map_set_vaddr(map, shmem->vaddr); + refcount_set(&shmem->vmap_use_count, 1); + } } if (ret) { @@ -379,28 +385,26 @@ int drm_gem_shmem_vmap(struct drm_gem_shmem_object *shmem, err_put_pages: if (!drm_gem_is_imported(obj)) - drm_gem_shmem_put_pages(shmem); -err_zero_use: - shmem->vmap_use_count = 0; + drm_gem_shmem_unpin_locked(shmem); return ret; } -EXPORT_SYMBOL(drm_gem_shmem_vmap); +EXPORT_SYMBOL_GPL(drm_gem_shmem_vmap_locked); /* - * drm_gem_shmem_vunmap - Unmap a virtual mapping for a shmem GEM object + * drm_gem_shmem_vunmap_locked - Unmap a virtual mapping for a shmem GEM object * @shmem: shmem GEM object * @map: Kernel virtual address where the SHMEM GEM object was mapped * * This function cleans up a kernel virtual address mapping acquired by - * drm_gem_shmem_vmap(). The mapping is only removed when the use count drops to - * zero. + * drm_gem_shmem_vmap_locked(). The mapping is only removed when the use count + * drops to zero. * * This function hides the differences between dma-buf imported and natively * allocated objects. */ -void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, - struct iosys_map *map) +void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, + struct iosys_map *map) { struct drm_gem_object *obj = &shmem->base; @@ -409,19 +413,15 @@ void drm_gem_shmem_vunmap(struct drm_gem_shmem_object *shmem, } else { dma_resv_assert_held(shmem->base.resv); - if (drm_WARN_ON_ONCE(obj->dev, !shmem->vmap_use_count)) - return; + if (refcount_dec_and_test(&shmem->vmap_use_count)) { + vunmap(shmem->vaddr); + shmem->vaddr = NULL; - if (--shmem->vmap_use_count > 0) - return; - - vunmap(shmem->vaddr); - drm_gem_shmem_put_pages(shmem); + drm_gem_shmem_unpin_locked(shmem); + } } - - shmem->vaddr = NULL; } -EXPORT_SYMBOL(drm_gem_shmem_vunmap); +EXPORT_SYMBOL_GPL(drm_gem_shmem_vunmap_locked); static int drm_gem_shmem_create_with_handle(struct drm_file *file_priv, @@ -449,7 +449,7 @@ drm_gem_shmem_create_with_handle(struct drm_file *file_priv, /* Update madvise status, returns true if not purged, else * false or -errno. */ -int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv) +int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv) { dma_resv_assert_held(shmem->base.resv); @@ -460,9 +460,9 @@ int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv) return (madv >= 0); } -EXPORT_SYMBOL(drm_gem_shmem_madvise); +EXPORT_SYMBOL_GPL(drm_gem_shmem_madvise_locked); -void drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem) +void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; struct drm_device *dev = obj->dev; @@ -476,7 +476,7 @@ void drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem) kfree(shmem->sgt); shmem->sgt = NULL; - drm_gem_shmem_put_pages(shmem); + drm_gem_shmem_put_pages_locked(shmem); shmem->madv = -1; @@ -492,7 +492,7 @@ void drm_gem_shmem_purge(struct drm_gem_shmem_object *shmem) invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1); } -EXPORT_SYMBOL(drm_gem_shmem_purge); +EXPORT_SYMBOL_GPL(drm_gem_shmem_purge_locked); /** * drm_gem_shmem_dumb_create - Create a dumb shmem buffer object @@ -575,8 +575,8 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) * mmap'd, vm_open() just grabs an additional reference for the new * mm the vma is getting copied into (ie. on fork()). */ - if (!drm_WARN_ON_ONCE(obj->dev, !shmem->pages_use_count)) - shmem->pages_use_count++; + drm_WARN_ON_ONCE(obj->dev, + !refcount_inc_not_zero(&shmem->pages_use_count)); dma_resv_unlock(shmem->base.resv); @@ -589,7 +589,7 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); dma_resv_lock(shmem->base.resv, NULL); - drm_gem_shmem_put_pages(shmem); + drm_gem_shmem_put_pages_locked(shmem); dma_resv_unlock(shmem->base.resv); drm_gem_vm_close(vma); @@ -639,7 +639,7 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct return -EINVAL; dma_resv_lock(shmem->base.resv, NULL); - ret = drm_gem_shmem_get_pages(shmem); + ret = drm_gem_shmem_get_pages_locked(shmem); dma_resv_unlock(shmem->base.resv); if (ret) @@ -666,11 +666,12 @@ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, if (drm_gem_is_imported(&shmem->base)) return; - drm_printf_indent(p, indent, "pages_use_count=%u\n", shmem->pages_use_count); - drm_printf_indent(p, indent, "vmap_use_count=%u\n", shmem->vmap_use_count); + drm_printf_indent(p, indent, "pages_pin_count=%u\n", refcount_read(&shmem->pages_pin_count)); + drm_printf_indent(p, indent, "pages_use_count=%u\n", refcount_read(&shmem->pages_use_count)); + drm_printf_indent(p, indent, "vmap_use_count=%u\n", refcount_read(&shmem->vmap_use_count)); drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr); } -EXPORT_SYMBOL(drm_gem_shmem_print_info); +EXPORT_SYMBOL_GPL(drm_gem_shmem_print_info); /** * drm_gem_shmem_get_sg_table - Provide a scatter/gather table of pinned @@ -707,7 +708,7 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ drm_WARN_ON(obj->dev, drm_gem_is_imported(obj)); - ret = drm_gem_shmem_get_pages(shmem); + ret = drm_gem_shmem_get_pages_locked(shmem); if (ret) return ERR_PTR(ret); @@ -729,7 +730,7 @@ err_free_sgt: sg_free_table(sgt); kfree(sgt); err_put_pages: - drm_gem_shmem_put_pages(shmem); + drm_gem_shmem_put_pages_locked(shmem); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 4b2f32889f00..735bfdf4322f 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1338,7 +1338,6 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, unsigned long num_dma_mapped; unsigned int order = 0; unsigned long *pfns; - struct page **pages; int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; @@ -1378,7 +1377,6 @@ retry: if (err) goto err_free; - pages = (struct page **)pfns; map_pages: /* * Perform all dma mappings under the notifier lock to not @@ -1454,8 +1452,6 @@ map_pages: err = -EFAULT; goto err_unmap; } - - pages[i] = page; } else { dma_addr_t addr; diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index b2b6a8e49dda..e44f28fd81d3 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -179,8 +179,8 @@ int drm_gem_pin_locked(struct drm_gem_object *obj); void drm_gem_unpin_locked(struct drm_gem_object *obj); int drm_gem_pin(struct drm_gem_object *obj); void drm_gem_unpin(struct drm_gem_object *obj); -int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map); -void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map); +int drm_gem_vmap_locked(struct drm_gem_object *obj, struct iosys_map *map); +void drm_gem_vunmap_locked(struct drm_gem_object *obj, struct iosys_map *map); /* drm_debugfs.c drm_debugfs_crc.c */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index dfa595556320..e5184a0c2465 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -36,6 +36,8 @@ #include #include +#include + #include