diff --git a/.mailmap b/.mailmap index a6c619e22efce..81ac1e17ac3cb 100644 --- a/.mailmap +++ b/.mailmap @@ -384,6 +384,7 @@ Li Yang Li Yang Lior David Lorenzo Pieralisi +Lorenzo Stoakes Luca Ceresoli Lukasz Luba Luo Jie diff --git a/CREDITS b/CREDITS index 0107047f807bf..f87c0fa62cfc8 100644 --- a/CREDITS +++ b/CREDITS @@ -1214,6 +1214,10 @@ D: UDF filesystem S: (ask for current address) S: USA +N: Larry Finger +E: Larry.Finger@lwfinger.net +D: Maintainer of wireless drivers, too many to list here + N: Jürgen Fischer E: fischer@norbit.de D: Author of Adaptec AHA-152x SCSI driver @@ -3146,9 +3150,11 @@ S: Triftstra=DFe 55 S: 13353 Berlin S: Germany -N: Gustavo Pimental +N: Gustavo Pimentel E: gustavo.pimentel@synopsys.com D: PCI driver for Synopsys DesignWare +D: Synopsys DesignWare eDMA driver +D: Synopsys DesignWare xData traffic generator N: Emanuel Pirker E: epirker@edu.uni-klu.ac.at diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index aa8290a29dc88..fd4b56c0996f4 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -723,40 +723,26 @@ Configuration pseudo-files: ======================= ======================================================= SecurityFlags Flags which control security negotiation and also packet signing. Authentication (may/must) - flags (e.g. for NTLM and/or NTLMv2) may be combined with + flags (e.g. for NTLMv2) may be combined with the signing flags. Specifying two different password hashing mechanisms (as "must use") on the other hand does not make much sense. Default flags are:: - 0x07007 - - (NTLM, NTLMv2 and packet signing allowed). The maximum - allowable flags if you want to allow mounts to servers - using weaker password hashes is 0x37037 (lanman, - plaintext, ntlm, ntlmv2, signing allowed). Some - SecurityFlags require the corresponding menuconfig - options to be enabled. Enabling plaintext - authentication currently requires also enabling - lanman authentication in the security flags - because the cifs module only supports sending - laintext passwords using the older lanman dialect - form of the session setup SMB. (e.g. for authentication - using plain text passwords, set the SecurityFlags - to 0x30030):: + 0x00C5 + + (NTLMv2 and packet signing allowed). Some SecurityFlags + may require enabling a corresponding menuconfig option. may use packet signing 0x00001 must use packet signing 0x01001 - may use NTLM (most common password hash) 0x00002 - must use NTLM 0x02002 may use NTLMv2 0x00004 must use NTLMv2 0x04004 - may use Kerberos security 0x00008 - must use Kerberos 0x08008 - may use lanman (weak) password hash 0x00010 - must use lanman password hash 0x10010 - may use plaintext passwords 0x00020 - must use plaintext passwords 0x20020 - (reserved for future packet encryption) 0x00040 + may use Kerberos security (krb5) 0x00008 + must use Kerberos 0x08008 + may use NTLMSSP 0x00080 + must use NTLMSSP 0x80080 + seal (packet encryption) 0x00040 + must seal (not implemented yet) 0x40040 cifsFYI If set to non-zero value, additional debug information will be logged to the system error log. This field diff --git a/Documentation/arch/riscv/cmodx.rst b/Documentation/arch/riscv/cmodx.rst index 1c0ca06b6c974..8c48bcff3df9d 100644 --- a/Documentation/arch/riscv/cmodx.rst +++ b/Documentation/arch/riscv/cmodx.rst @@ -62,10 +62,10 @@ cmodx.c:: printf("Value before cmodx: %d\n", value); // Call prctl before first fence.i is called inside modify_instruction - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS); modify_instruction(); // Call prctl after final fence.i is called in process - prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS); + prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_OFF, PR_RISCV_SCOPE_PER_PROCESS); value = get_value(); printf("Value after cmodx: %d\n", value); diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml index 07ccbda4a0ab5..b9a9f2cf32a1b 100644 --- a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml +++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml @@ -66,7 +66,6 @@ allOf: compatible: contains: enum: - - qcom,qdu1000-llcc - qcom,sc7180-llcc - qcom,sm6350-llcc then: @@ -104,6 +103,7 @@ allOf: compatible: contains: enum: + - qcom,qdu1000-llcc - qcom,sc8180x-llcc - qcom,sc8280xp-llcc - qcom,x1e80100-llcc diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 5149ecdc53c79..d732c42526dfd 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -328,6 +328,12 @@ CXL Memory Device .. kernel-doc:: drivers/cxl/mem.c :doc: cxl mem +.. kernel-doc:: drivers/cxl/cxlmem.h + :internal: + +.. kernel-doc:: drivers/cxl/core/memdev.c + :identifiers: + CXL Port -------- .. kernel-doc:: drivers/cxl/port.c @@ -341,6 +347,15 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: +.. kernel-doc:: drivers/cxl/core/hdm.c + :doc: cxl core hdm + +.. kernel-doc:: drivers/cxl/core/hdm.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/cdat.c + :identifiers: + .. kernel-doc:: drivers/cxl/core/port.c :doc: cxl core diff --git a/Documentation/gpu/amdgpu/driver-core.rst b/Documentation/gpu/amdgpu/driver-core.rst index 467e6843aef63..32723a925377e 100644 --- a/Documentation/gpu/amdgpu/driver-core.rst +++ b/Documentation/gpu/amdgpu/driver-core.rst @@ -179,4 +179,4 @@ IP Blocks :doc: IP Blocks .. kernel-doc:: drivers/gpu/drm/amd/include/amd_shared.h - :identifiers: amd_ip_block_type amd_ip_funcs + :identifiers: amd_ip_block_type amd_ip_funcs DC_DEBUG_MASK diff --git a/Documentation/gpu/amdgpu/index.rst b/Documentation/gpu/amdgpu/index.rst index 847e04924030c..302d039928ee8 100644 --- a/Documentation/gpu/amdgpu/index.rst +++ b/Documentation/gpu/amdgpu/index.rst @@ -16,4 +16,5 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures. thermal driver-misc debugging + process-isolation amdgpu-glossary diff --git a/Documentation/gpu/amdgpu/process-isolation.rst b/Documentation/gpu/amdgpu/process-isolation.rst new file mode 100644 index 0000000000000..6b6d70e357a75 --- /dev/null +++ b/Documentation/gpu/amdgpu/process-isolation.rst @@ -0,0 +1,59 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= + AMDGPU Process Isolation +========================= + +The AMDGPU driver includes a feature that enables automatic process isolation on the graphics engine. This feature serializes access to the graphics engine and adds a cleaner shader which clears the Local Data Store (LDS) and General Purpose Registers (GPRs) between jobs. All processes using the GPU, including both graphics and compute workloads, are serialized when this feature is enabled. On GPUs that support partitionable graphics engines, this feature can be enabled on a per-partition basis. + +In addition, there is an interface to manually run the cleaner shader when the use of the GPU is complete. This may be preferable in some use cases, such as a single-user system where the login manager triggers the cleaner shader when the user logs out. + +Process Isolation +================= + +The `run_cleaner_shader` and `enforce_isolation` sysfs interfaces allow users to manually execute the cleaner shader and control the process isolation feature, respectively. + +Partition Handling +------------------ + +The `enforce_isolation` file in sysfs can be used to enable process isolation and automatic shader cleanup between processes. On GPUs that support graphics engine partitioning, this can be enabled per partition. The partition and its current setting (0 disabled, 1 enabled) can be read from sysfs. On GPUs that do not support graphics engine partitioning, only a single partition will be present. Writing 1 to the partition position enables enforce isolation, writing 0 disables it. + +Example of enabling enforce isolation on a GPU with multiple partitions: + +.. code-block:: console + + $ echo 1 0 1 0 > /sys/class/drm/card0/device/enforce_isolation + $ cat /sys/class/drm/card0/device/enforce_isolation + 1 0 1 0 + +The output indicates that enforce isolation is enabled on zeroth and second parition and disabled on first and fourth parition. + +For devices with a single partition or those that do not support partitions, there will be only one element: + +.. code-block:: console + + $ echo 1 > /sys/class/drm/card0/device/enforce_isolation + $ cat /sys/class/drm/card0/device/enforce_isolation + 1 + +Cleaner Shader Execution +======================== + +The driver can trigger a cleaner shader to clean up the LDS and GPR state on the graphics engine. When process isolation is enabled, this happens automatically between processes. In addition, there is a sysfs file to manually trigger cleaner shader execution. + +To manually trigger the execution of the cleaner shader, write `0` to the `run_cleaner_shader` sysfs file: + +.. code-block:: console + + $ echo 0 > /sys/class/drm/card0/device/run_cleaner_shader + +For multi-partition devices, you can specify the partition index when triggering the cleaner shader: + +.. code-block:: console + + $ echo 0 > /sys/class/drm/card0/device/run_cleaner_shader # For partition 0 + $ echo 1 > /sys/class/drm/card0/device/run_cleaner_shader # For partition 1 + $ echo 2 > /sys/class/drm/card0/device/run_cleaner_shader # For partition 2 + # ... and so on for each partition + +This command initiates the cleaner shader, which will run and complete before any new tasks are scheduled on the GPU. diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst index 9232cd7da301b..5d0b68f752c0d 100644 --- a/Documentation/networking/devlink/devlink-region.rst +++ b/Documentation/networking/devlink/devlink-region.rst @@ -49,7 +49,7 @@ example usage $ devlink region show [ DEV/REGION ] $ devlink region del DEV/REGION snapshot SNAPSHOT_ID $ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ] - $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length length + $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ] address ADDRESS length LENGTH # Show all of the exposed regions with region sizes: $ devlink region show diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index a141e8e65c5d3..9a97030c6c8df 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -186,6 +186,7 @@ Code Seq# Include File Comments 'Q' all linux/soundcard.h 'R' 00-1F linux/random.h conflict! 'R' 01 linux/rfkill.h conflict! +'R' 20-2F linux/trace_mmap.h 'R' C0-DF net/bluetooth/rfcomm.h 'R' E0 uapi/linux/fsl_mc.h 'S' all linux/cdrom.h conflict! diff --git a/MAINTAINERS b/MAINTAINERS index ed2d2dbcec81f..958d0084af63e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -846,12 +846,6 @@ ALPS PS/2 TOUCHPAD DRIVER R: Pali Rohár F: drivers/input/mouse/alps.* -ALTERA I2C CONTROLLER DRIVER -M: Thor Thayer -S: Maintained -F: Documentation/devicetree/bindings/i2c/i2c-altera.txt -F: drivers/i2c/busses/i2c-altera.c - ALTERA MAILBOX DRIVER M: Mun Yew Tham S: Maintained @@ -871,21 +865,6 @@ L: linux-gpio@vger.kernel.org S: Maintained F: drivers/gpio/gpio-altera.c -ALTERA SYSTEM MANAGER DRIVER -M: Thor Thayer -S: Maintained -F: drivers/mfd/altera-sysmgr.c -F: include/linux/mfd/altera-sysmgr.h - -ALTERA SYSTEM RESOURCE DRIVER FOR ARRIA10 DEVKIT -M: Thor Thayer -S: Maintained -F: drivers/gpio/gpio-altera-a10sr.c -F: drivers/mfd/altera-a10sr.c -F: drivers/reset/reset-a10sr.c -F: include/dt-bindings/reset/altr,rst-mgr-a10sr.h -F: include/linux/mfd/altera-a10sr.h - ALTERA TRIPLE SPEED ETHERNET DRIVER M: Joyce Ooi L: netdev@vger.kernel.org @@ -2892,7 +2871,7 @@ F: drivers/edac/altera_edac.[ch] ARM/SPREADTRUM SoC SUPPORT M: Orson Zhai M: Baolin Wang -M: Chunyan Zhang +R: Chunyan Zhang S: Maintained F: arch/arm64/boot/dts/sprd N: sprd @@ -3601,10 +3580,9 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/b43 F: drivers/net/wireless/broadcom/b43/ B43LEGACY WIRELESS DRIVER -M: Larry Finger L: linux-wireless@vger.kernel.org L: b43-dev@lists.infradead.org -S: Maintained +S: Orphan W: https://wireless.wiki.kernel.org/en/users/Drivers/b43 F: drivers/net/wireless/broadcom/b43legacy/ @@ -6239,9 +6217,8 @@ S: Maintained F: drivers/usb/dwc3/ DESIGNWARE XDATA IP DRIVER -M: Gustavo Pimentel L: linux-pci@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/misc-devices/dw-xdata-pcie.rst F: drivers/misc/dw-xdata-pcie.c @@ -8841,6 +8818,7 @@ F: drivers/spi/spi-fsl-qspi.c FREESCALE QUICC ENGINE LIBRARY M: Qiang Zhao +M: Christophe Leroy L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/soc/fsl/qe/ @@ -8890,9 +8868,10 @@ S: Maintained F: drivers/tty/serial/ucc_uart.c FREESCALE SOC DRIVERS +M: Christophe Leroy L: linuxppc-dev@lists.ozlabs.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Orphan +S: Maintained F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml F: Documentation/devicetree/bindings/soc/fsl/ F: drivers/soc/fsl/ @@ -14482,7 +14461,7 @@ MEMORY MAPPING M: Andrew Morton R: Liam R. Howlett R: Vlastimil Babka -R: Lorenzo Stoakes +R: Lorenzo Stoakes L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org @@ -16455,7 +16434,7 @@ F: arch/arm/boot/dts/ti/omap/am335x-nano.dts OMAP1 SUPPORT M: Aaro Koskinen M: Janusz Krzysztofik -M: Tony Lindgren +R: Tony Lindgren L: linux-omap@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-omap/list/ @@ -16467,10 +16446,13 @@ F: include/linux/platform_data/ams-delta-fiq.h F: include/linux/platform_data/i2c-omap.h OMAP2+ SUPPORT +M: Aaro Koskinen +M: Andreas Kemnade +M: Kevin Hilman +M: Roger Quadros M: Tony Lindgren L: linux-omap@vger.kernel.org S: Maintained -W: http://www.muru.com/linux/omap/ W: http://linux.omap.com/ Q: http://patchwork.kernel.org/project/linux-omap/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git @@ -18382,7 +18364,7 @@ M: Jeff Johnson L: ath12k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: drivers/net/wireless/ath/ath12k/ N: ath12k @@ -18392,7 +18374,7 @@ M: Jeff Johnson L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: drivers/net/wireless/ath/ath10k/ N: ath10k @@ -18403,7 +18385,7 @@ L: ath11k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k B: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: drivers/net/wireless/ath/ath11k/ N: ath11k @@ -18412,7 +18394,7 @@ M: Toke Høiland-Jørgensen L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git F: Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml F: drivers/net/wireless/ath/ath9k/ @@ -19323,7 +19305,7 @@ F: drivers/perf/riscv_pmu_legacy.c F: drivers/perf/riscv_pmu_sbi.c RISC-V THEAD SoC SUPPORT -M: Jisheng Zhang +M: Drew Fustini M: Guo Ren M: Fu Wei L: linux-riscv@lists.infradead.org @@ -19517,7 +19499,6 @@ F: drivers/net/wireless/realtek/rtl818x/rtl8180/ RTL8187 WIRELESS DRIVER M: Hin-Tak Leung -M: Larry Finger L: linux-wireless@vger.kernel.org S: Maintained T: git https://github.com/pkshih/rtw.git @@ -21255,7 +21236,6 @@ W: http://wiki.laptop.org/go/DCON F: drivers/staging/olpc_dcon/ STAGING - REALTEK RTL8712U DRIVERS -M: Larry Finger M: Florian Schilhabel . S: Odd Fixes F: drivers/staging/rtl8712/ @@ -23871,8 +23851,8 @@ S: Maintained F: drivers/vhost/scsi.c VIRTIO I2C DRIVER -M: Conghui Chen M: Viresh Kumar +R: "Chen, Jian Jun" L: linux-i2c@vger.kernel.org L: virtualization@lists.linux.dev S: Maintained diff --git a/Makefile b/Makefile index 06aa6402b3850..9d3080e06d7e1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1256,7 +1256,12 @@ define filechk_version.h ((c) > 255 ? 255 : (c)))'; \ echo \#define LINUX_VERSION_MAJOR $(VERSION); \ echo \#define LINUX_VERSION_PATCHLEVEL $(PATCHLEVEL); \ - echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL) + echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL); \ + echo '#define DRM_VER $(VERSION)'; \ + echo '#define DRM_PATCH $(PATCHLEVEL)'; \ + echo '#define DRM_SUB $(SUBLEVEL)'; \ + echo \#define DRM_VERSION_CODE LINUX_VERSION_CODE; \ + echo '#define DRM_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' endef $(version_h): private PATCHLEVEL := $(or $(PATCHLEVEL), 0) diff --git a/arch/arm/boot/dts/rockchip/rk3066a.dtsi b/arch/arm/boot/dts/rockchip/rk3066a.dtsi index 30139f21de64d..15cbd94d7ec05 100644 --- a/arch/arm/boot/dts/rockchip/rk3066a.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3066a.dtsi @@ -128,6 +128,7 @@ pinctrl-0 = <&hdmii2c_xfer>, <&hdmi_hpd>; power-domains = <&power RK3066_PD_VIO>; rockchip,grf = <&grf>; + #sound-dai-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c index 8aa39db095d76..2c5155bd376ba 100644 --- a/arch/arm/mach-davinci/pm.c +++ b/arch/arm/mach-davinci/pm.c @@ -61,7 +61,7 @@ static void davinci_pm_suspend(void) /* Configure sleep count in deep sleep register */ val = __raw_readl(pm_config.deepsleep_reg); - val &= ~DEEPSLEEP_SLEEPCOUNT_MASK, + val &= ~DEEPSLEEP_SLEEPCOUNT_MASK; val |= pm_config.sleepcount; __raw_writel(val, pm_config.deepsleep_reg); diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts index c204dd43c7269..ce90327e1b2e8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h64-remix-mini-pc.dts @@ -191,7 +191,7 @@ compatible = "x-powers,axp803"; reg = <0x3a3>; interrupt-parent = <&r_intc>; - interrupts = ; + interrupts = ; x-powers,drive-vbus-en; vin1-supply = <®_vcc5v>; diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi index f2a5e2e40461f..f90f03fa6a24f 100644 --- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi +++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi @@ -1459,9 +1459,23 @@ system-cache-controller@19200000 { compatible = "qcom,qdu1000-llcc"; - reg = <0 0x19200000 0 0xd80000>, + reg = <0 0x19200000 0 0x80000>, + <0 0x19300000 0 0x80000>, + <0 0x19600000 0 0x80000>, + <0 0x19700000 0 0x80000>, + <0 0x19a00000 0 0x80000>, + <0 0x19b00000 0 0x80000>, + <0 0x19e00000 0 0x80000>, + <0 0x19f00000 0 0x80000>, <0 0x1a200000 0 0x80000>; reg-names = "llcc0_base", + "llcc1_base", + "llcc2_base", + "llcc3_base", + "llcc4_base", + "llcc5_base", + "llcc6_base", + "llcc7_base", "llcc_broadcast_base"; interrupts = ; }; diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 31de735948390..1b3dc0ece54de 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -3605,7 +3605,7 @@ interrupts = , , , - ; + ; }; pcie0: pcie@1c00000 { diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi index 0677123105602..581a70c34fd29 100644 --- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi @@ -2647,11 +2647,14 @@ system-cache-controller@9200000 { compatible = "qcom,sc8180x-llcc"; - reg = <0 0x09200000 0 0x50000>, <0 0x09280000 0 0x50000>, - <0 0x09300000 0 0x50000>, <0 0x09380000 0 0x50000>, - <0 0x09600000 0 0x50000>; + reg = <0 0x09200000 0 0x58000>, <0 0x09280000 0 0x58000>, + <0 0x09300000 0 0x58000>, <0 0x09380000 0 0x58000>, + <0 0x09400000 0 0x58000>, <0 0x09480000 0 0x58000>, + <0 0x09500000 0 0x58000>, <0 0x09580000 0 0x58000>, + <0 0x09600000 0 0x58000>; reg-names = "llcc0_base", "llcc1_base", "llcc2_base", - "llcc3_base", "llcc_broadcast_base"; + "llcc3_base", "llcc4_base", "llcc5_base", + "llcc6_base", "llcc7_base", "llcc_broadcast_base"; interrupts = ; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index 41215567b3aed..372b35fb844f5 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -977,8 +977,7 @@ reset-n-pins { pins = "gpio99"; function = "gpio"; - output-high; - drive-strength = <16>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index e937732abeded..4bf99b6b6e5fb 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -655,15 +655,16 @@ status = "okay"; - /* FIXME: verify */ touchscreen@10 { - compatible = "hid-over-i2c"; + compatible = "elan,ekth5015m", "elan,ekth6915"; reg = <0x10>; - hid-descr-addr = <0x1>; interrupts-extended = <&tlmm 175 IRQ_TYPE_LEVEL_LOW>; - vdd-supply = <&vreg_misc_3p3>; - vddl-supply = <&vreg_s10b>; + reset-gpios = <&tlmm 99 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>; + no-reset-on-power-off; + + vcc33-supply = <&vreg_misc_3p3>; + vccio-supply = <&vreg_misc_3p3>; pinctrl-names = "default"; pinctrl-0 = <&ts0_default>; @@ -1496,8 +1497,8 @@ reset-n-pins { pins = "gpio99"; function = "gpio"; - output-high; - drive-strength = <16>; + drive-strength = <2>; + bias-disable; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 0549ba1fbeea8..59f0a850671a3 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -4623,6 +4623,8 @@ restart@c264000 { compatible = "qcom,pshold"; reg = <0 0x0c264000 0 0x4>; + /* TZ seems to block access */ + status = "reserved"; }; tsens1: thermal-sensor@c265000 { diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index aca0a87092e45..9ed062150aaf2 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -1090,6 +1090,7 @@ power-domains = <&rpmpd SM6115_VDDCX>; operating-points-v2 = <&sdhc1_opp_table>; + iommus = <&apps_smmu 0x00c0 0x0>; interconnects = <&system_noc MASTER_SDCC_1 RPM_ALWAYS_TAG &bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>, <&bimc MASTER_AMPSS_M0 RPM_ALWAYS_TAG diff --git a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts index c5c2895b37c7f..be6b1e7d07ce3 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-crd.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-crd.dts @@ -49,6 +49,15 @@ stdout-path = "serial0:115200n8"; }; + reserved-memory { + linux,cma { + compatible = "shared-dma-pool"; + size = <0x0 0x8000000>; + reusable; + linux,cma-default; + }; + }; + sound { compatible = "qcom,x1e80100-sndcard"; model = "X1E80100-CRD"; @@ -93,7 +102,7 @@ }; codec { - sound-dai = <&wcd938x 1>, <&swr2 0>, <&lpass_txmacro 0>; + sound-dai = <&wcd938x 1>, <&swr2 1>, <&lpass_txmacro 0>; }; platform { @@ -744,7 +753,7 @@ wcd_tx: codec@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; - qcom,tx-port-mapping = <1 1 2 3>; + qcom,tx-port-mapping = <2 2 3 4>; }; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index 2061fbe7b75a9..8f67c393b871b 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -23,6 +23,15 @@ stdout-path = "serial0:115200n8"; }; + reserved-memory { + linux,cma { + compatible = "shared-dma-pool"; + size = <0x0 0x8000000>; + reusable; + linux,cma-default; + }; + }; + vph_pwr: vph-pwr-regulator { compatible = "regulator-fixed"; diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 5f90a0b3c0166..05e4d491ec18c 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -2737,15 +2737,17 @@ device_type = "pci"; compatible = "qcom,pcie-x1e80100"; reg = <0 0x01bf8000 0 0x3000>, - <0 0x70000000 0 0xf1d>, - <0 0x70000f20 0 0xa8>, + <0 0x70000000 0 0xf20>, + <0 0x70000f40 0 0xa8>, <0 0x70001000 0 0x1000>, - <0 0x70100000 0 0x100000>; + <0 0x70100000 0 0x100000>, + <0 0x01bfb000 0 0x1000>; reg-names = "parf", "dbi", "elbi", "atu", - "config"; + "config", + "mhi"; #address-cells = <3>; #size-cells = <2>; ranges = <0x01000000 0 0x00000000 0 0x70200000 0 0x100000>, diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts index b47fe02c33fbd..079101cddd65f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts @@ -5,6 +5,8 @@ */ /dts-v1/; + +#include #include "rk3308.dtsi" / { @@ -24,17 +26,21 @@ leds { compatible = "gpio-leds"; pinctrl-names = "default"; - pinctrl-0 = <&green_led_gio>, <&heartbeat_led_gpio>; + pinctrl-0 = <&green_led>, <&heartbeat_led>; green-led { + color = ; default-state = "on"; + function = LED_FUNCTION_POWER; gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; label = "rockpis:green:power"; linux,default-trigger = "default-on"; }; blue-led { + color = ; default-state = "on"; + function = LED_FUNCTION_HEARTBEAT; gpios = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>; label = "rockpis:blue:user"; linux,default-trigger = "heartbeat"; @@ -126,10 +132,12 @@ }; &emmc { - bus-width = <4>; cap-mmc-highspeed; - mmc-hs200-1_8v; + cap-sd-highspeed; + no-sdio; non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; vmmc-supply = <&vcc_io>; status = "okay"; }; @@ -214,11 +222,11 @@ pinctrl-0 = <&rtc_32k>; leds { - green_led_gio: green-led-gpio { + green_led: green-led { rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>; }; - heartbeat_led_gpio: heartbeat-led-gpio { + heartbeat_led: heartbeat-led { rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi index 962ea893999bd..c00da150a22fa 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi @@ -811,7 +811,7 @@ clocks = <&cru SCLK_I2S2_8CH_TX_OUT>, <&cru SCLK_I2S2_8CH_RX_OUT>, <&cru PCLK_ACODEC>; - reset-names = "codec-reset"; + reset-names = "codec"; resets = <&cru SRST_ACODEC_P>; #sound-dai-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts index f09d60bbe6c4f..a608a219543e5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts @@ -241,8 +241,8 @@ rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; - interrupt-parent = <&gpio2>; - interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&gpio0>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; #clock-cells = <1>; clock-output-names = "xin32k", "rk805-clkout2"; gpio-controller; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index 734f87db4d115..73618df7a8898 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -793,6 +793,7 @@ dma-names = "tx"; pinctrl-names = "default"; pinctrl-0 = <&spdif_tx>; + #sound-dai-cells = <0>; status = "disabled"; }; @@ -804,6 +805,7 @@ clocks = <&cru SCLK_I2S_2CH>, <&cru HCLK_I2S_2CH>; dmas = <&dmac_bus 6>, <&dmac_bus 7>; dma-names = "tx", "rx"; + #sound-dai-cells = <0>; status = "disabled"; }; @@ -817,6 +819,7 @@ dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&i2s_8ch_bus>; + #sound-dai-cells = <0>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 789fd0dcc88ba..3cd63d1e8f15b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -450,7 +450,7 @@ ap_i2c_audio: &i2c8 { dlg,btn-cfg = <50>; dlg,mic-det-thr = <500>; dlg,jack-ins-deb = <20>; - dlg,jack-det-rate = "32ms_64ms"; + dlg,jack-det-rate = "32_64"; dlg,jack-rem-deb = <1>; dlg,a-d-btn-thr = <0xa>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 26322a358d919..b908ce006c26e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -289,7 +289,7 @@ regulator-name = "vdd_gpu"; regulator-always-on; regulator-boot-on; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; regulator-ramp-delay = <6001>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index 1a604429fb266..e74871491ef56 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -444,6 +444,7 @@ &sdmmc { bus-width = <4>; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts index b4f22d95ac0e1..e80caa36f8e44 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts @@ -435,6 +435,7 @@ &sdmmc { bus-width = <4>; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts index b8e15b76a8a6c..2e7512676b7e1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -383,6 +383,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; sd-uhs-sdr104; vmmc-supply = <&vcc_3v3_s3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi index aebe1fedd2d81..615094bb8ba38 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-tiger.dtsi @@ -344,6 +344,11 @@ }; }; +&pwm0 { + pinctrl-0 = <&pwm0m1_pins>; + pinctrl-names = "default"; +}; + &saradc { vref-supply = <&vcc_1v8_s0>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index 3b2ec1d0c5421..074c316a9a694 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -288,9 +288,9 @@ pinctrl-0 = <&i2c7m0_xfer>; status = "okay"; - es8316: audio-codec@11 { + es8316: audio-codec@10 { compatible = "everest,es8316"; - reg = <0x11>; + reg = <0x10>; assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; assigned-clock-rates = <12288000>; clocks = <&cru I2S0_8CH_MCLKOUT>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts index 8e2a07612d173..3b9a349362db4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts @@ -366,6 +366,7 @@ bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; + cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>; disable-wp; max-frequency = <150000000>; no-sdio; @@ -393,6 +394,7 @@ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>, <&rk806_dvs2_null>, <&rk806_dvs3_null>; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 57a9abe78ee41..2c7bf4da0b800 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1036,6 +1036,7 @@ CONFIG_SND_AUDIO_GRAPH_CARD2=m CONFIG_HID_MULTITOUCH=m CONFIG_I2C_HID_ACPI=m CONFIG_I2C_HID_OF=m +CONFIG_I2C_HID_OF_ELAN=m CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d1030bc52564e..d283d281d28e8 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { struct eeh_dev *edev; struct pci_dev *pdev; + struct pci_bus *bus = NULL; if (pe->type & EEH_PE_PHB) return pe->phb->bus; @@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) - return pdev->bus; + bus = pdev->bus; + pci_unlock_rescan_remove(); - return NULL; + return bus; } diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4690c219bfa4d..63432a33ec49a 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -647,8 +647,9 @@ __after_prom_start: * Note: This process overwrites the OF exception vectors. */ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) - mr. r4,r26 /* In some cases the loader may */ - beq 9f /* have already put us at zero */ + mr r4,r26 /* Load the virtual source address into r4 */ + cmpld r3,r4 /* Check if source == dest */ + beq 9f /* If so skip the copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 85050be08a23d..72b12bc10f90b 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -27,6 +27,7 @@ #include #include #include /* _end */ +#include #include #include #include @@ -317,6 +318,16 @@ void default_machine_kexec(struct kimage *image) if (!kdump_in_progress()) kexec_prepare_cpus(); +#ifdef CONFIG_PPC_PSERIES + /* + * This must be done after other CPUs have shut down, otherwise they + * could execute the 'scv' instruction, which is not supported with + * reloc disabled (see configure_exceptions()). + */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + pseries_disable_reloc_on_exc(); +#endif + printk("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 096d09ed89f67..431be156ca9bb 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -61,11 +61,3 @@ void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } else xics_kexec_teardown_cpu(secondary); } - -void pseries_machine_kexec(struct kimage *image) -{ - if (firmware_has_feature(FW_FEATURE_SET_MODE)) - pseries_disable_reloc_on_exc(); - - default_machine_kexec(image); -} diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index bba4ad192b0fe..3968a6970fa81 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -38,7 +38,6 @@ static inline void smp_init_pseries(void) { } #endif extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); -void pseries_machine_kexec(struct kimage *image); extern void pSeries_final_fixup(void); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 284a6fa04b0c2..b10a253252387 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void) { void (*ctor)(void *) = get_dtl_cache_ctor(); - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, - DISPATCH_LOG_BYTES, 0, ctor); + dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor); if (!dtl_cache) { pr_warn("Failed to create dispatch trace log buffer cache\n"); pr_warn("Stolen time statistics will be unreliable\n"); @@ -1159,7 +1159,6 @@ define_machine(pseries) { .machine_check_exception = pSeries_machine_check_exception, .machine_check_log_err = pSeries_machine_check_log_err, #ifdef CONFIG_KEXEC_CORE - .machine_kexec = pseries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts index 8df4cf3656f2c..a7d753b6fdfd1 100644 --- a/arch/riscv/boot/dts/canaan/canaan_kd233.dts +++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts @@ -15,6 +15,10 @@ model = "Kendryte KD233"; compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -46,7 +50,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = , @@ -118,6 +121,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &spi0 { @@ -125,6 +129,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "canaan,kd233-tft", "ilitek,ili9341"; diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi index f87c5164d9cf6..4f5d40fa1e77c 100644 --- a/arch/riscv/boot/dts/canaan/k210.dtsi +++ b/arch/riscv/boot/dts/canaan/k210.dtsi @@ -16,13 +16,6 @@ #size-cells = <1>; compatible = "canaan,kendryte-k210"; - aliases { - serial0 = &uarths0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - }; - /* * The K210 has an sv39 MMU following the privileged specification v1.9. * Since this is a non-ratified draft specification, the kernel does not @@ -137,6 +130,7 @@ reg = <0x38000000 0x1000>; interrupts = <33>; clocks = <&sysclk K210_CLK_CPU>; + status = "disabled"; }; gpio0: gpio-controller@38001000 { @@ -152,6 +146,7 @@ <62>, <63>, <64>, <65>; gpio-controller; ngpios = <32>; + status = "disabled"; }; dmac0: dma-controller@50000000 { @@ -187,6 +182,7 @@ <&sysclk K210_CLK_GPIO>; clock-names = "bus", "db"; resets = <&sysrst K210_RST_GPIO>; + status = "disabled"; gpio1_0: gpio-port@0 { #gpio-cells = <2>; @@ -214,6 +210,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; uart2: serial@50220000 { @@ -230,6 +227,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; uart3: serial@50230000 { @@ -246,6 +244,7 @@ dsr-override; cts-override; ri-override; + status = "disabled"; }; spi2: spi@50240000 { @@ -259,6 +258,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ssi_clk", "pclk"; resets = <&sysrst K210_RST_SPI2>; + status = "disabled"; }; i2s0: i2s@50250000 { @@ -268,6 +268,7 @@ clocks = <&sysclk K210_CLK_I2S0>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S0>; + status = "disabled"; }; i2s1: i2s@50260000 { @@ -277,6 +278,7 @@ clocks = <&sysclk K210_CLK_I2S1>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S1>; + status = "disabled"; }; i2s2: i2s@50270000 { @@ -286,6 +288,7 @@ clocks = <&sysclk K210_CLK_I2S2>; clock-names = "i2sclk"; resets = <&sysrst K210_RST_I2S2>; + status = "disabled"; }; i2c0: i2c@50280000 { @@ -296,6 +299,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C0>; + status = "disabled"; }; i2c1: i2c@50290000 { @@ -306,6 +310,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C1>; + status = "disabled"; }; i2c2: i2c@502a0000 { @@ -316,6 +321,7 @@ <&sysclk K210_CLK_APB0>; clock-names = "ref", "pclk"; resets = <&sysrst K210_RST_I2C2>; + status = "disabled"; }; fpioa: pinmux@502b0000 { @@ -464,6 +470,7 @@ reset-names = "spi"; num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; spi1: spi@53000000 { @@ -479,6 +486,7 @@ reset-names = "spi"; num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; spi3: spi@54000000 { @@ -495,6 +503,7 @@ num-cs = <4>; reg-io-width = <4>; + status = "disabled"; }; }; }; diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts index 396c8ca4d24db..5734cc03753b1 100644 --- a/arch/riscv/boot/dts/canaan/k210_generic.dts +++ b/arch/riscv/boot/dts/canaan/k210_generic.dts @@ -15,6 +15,10 @@ model = "Kendryte K210 generic"; compatible = "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -24,7 +28,6 @@ &fpioa { pinctrl-0 = <&jtag_pins>; pinctrl-names = "default"; - status = "okay"; jtag_pins: jtag-pinmux { pinmux = , diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index 6d25bf07481a6..2ab376d609d25 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -17,6 +17,10 @@ compatible = "sipeed,maix-bit", "sipeed,maix-bitm", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -58,7 +62,6 @@ &fpioa { pinctrl-names = "default"; pinctrl-0 = <&jtag_pinctrl>; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = , @@ -156,6 +159,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -170,6 +174,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -199,6 +204,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index f4f4d8d5e8b88..d98e20775c073 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -17,6 +17,10 @@ compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -63,7 +67,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = , @@ -159,6 +162,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -173,6 +177,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index 0d86df47e1ed3..79ecd549700a0 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -16,6 +16,10 @@ model = "SiPeed MAIX GO"; compatible = "sipeed,maix-go", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -69,7 +73,6 @@ &fpioa { pinctrl-0 = <&jtag_pinctrl>; pinctrl-names = "default"; - status = "okay"; jtag_pinctrl: jtag-pinmux { pinmux = , @@ -167,6 +170,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -181,6 +185,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -209,6 +214,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 5c05c498e2b88..019c03ae51f6c 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -15,6 +15,10 @@ model = "SiPeed MAIXDUINO"; compatible = "sipeed,maixduino", "canaan,kendryte-k210"; + aliases { + serial0 = &uarths0; + }; + chosen { bootargs = "earlycon console=ttySIF0"; stdout-path = "serial0:115200n8"; @@ -39,8 +43,6 @@ }; &fpioa { - status = "okay"; - uarths_pinctrl: uarths-pinmux { pinmux = , /* Header "0" */ ; /* Header "1" */ @@ -132,6 +134,7 @@ #sound-dai-cells = <1>; pinctrl-0 = <&i2s0_pinctrl>; pinctrl-names = "default"; + status = "okay"; }; &i2c1 { @@ -146,6 +149,7 @@ pinctrl-names = "default"; num-cs = <1>; cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>; + status = "okay"; panel@0 { compatible = "sitronix,st7789v"; @@ -174,6 +178,8 @@ }; &spi3 { + status = "okay"; + flash@0 { compatible = "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi index 8ff6ea64f0489..68d16717db8cd 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-common.dtsi @@ -244,7 +244,7 @@ regulator-boot-on; regulator-always-on; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; regulator-name = "emmc_vdd"; }; }; diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ed9cad20c039d..3c830a6f7ef46 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -121,20 +121,12 @@ static void machine_kexec_mask_interrupts(void) for_each_irq_desc(i, desc) { struct irq_chip *chip; - int ret; chip = irq_desc_get_chip(desc); if (!chip) continue; - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) chip->irq_eoi(&desc->irq_data); if (chip->irq_mask) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 0d3f00eb0baee..10e311b2759d3 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -32,6 +32,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int graph_idx = 0; int level = 0; if (regs) { @@ -68,7 +69,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, pc = regs->ra; } else { fp = frame->fp; - pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, &frame->ra); if (pc == (unsigned long)ret_from_exception) { if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 04db1f993c475..bcf41d6e0df0e 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -327,7 +327,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 8c4adece89112..f3602414a9614 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -601,17 +601,16 @@ CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y # CONFIG_FB_DEVICE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m -CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_MLX5_VFIO_PCI=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 6dd11d3b6aaa6..d0d8925fdf09f 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -592,17 +592,16 @@ CONFIG_WATCHDOG_CORE=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB=y # CONFIG_FB_DEVICE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_MLX4_INFINIBAND=m CONFIG_MLX5_INFINIBAND=m -CONFIG_SYNC_FILE=y CONFIG_VFIO=m CONFIG_VFIO_PCI=m CONFIG_MLX5_VFIO_PCI=m diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 95990461888fc..9281063636a73 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -427,6 +427,7 @@ struct kvm_vcpu_stat { u64 instruction_io_other; u64 instruction_lpsw; u64 instruction_lpswe; + u64 instruction_lpswey; u64 instruction_pfmf; u64 instruction_ptff; u64 instruction_sck; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 82e9631cd9efb..54b5b2565df8d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_io_other), STATS_DESC_COUNTER(VCPU, instruction_lpsw), STATS_DESC_COUNTER(VCPU, instruction_lpswe), + STATS_DESC_COUNTER(VCPU, instruction_lpswey), STATS_DESC_COUNTER(VCPU, instruction_pfmf), STATS_DESC_COUNTER(VCPU, instruction_ptff), STATS_DESC_COUNTER(VCPU, instruction_sck), diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 111eb5c747840..bf8534218af3d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } +static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar) +{ + u32 base1 = vcpu->arch.sie_block->ipb >> 28; + s64 disp1; + + /* The displacement is a 20bit _SIGNED_ value */ + disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19); + + if (ar) + *ar = base1; + + return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; +} + static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, u64 *address1, u64 *address2, u8 *ar_b1, u8 *ar_b2) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1be19cc9d73c1..1a49b89706f86 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) return 0; } +static int handle_lpswey(struct kvm_vcpu *vcpu) +{ + psw_t new_psw; + u64 addr; + int rc; + u8 ar; + + vcpu->stat.instruction_lpswey++; + + if (!test_kvm_facility(vcpu->kvm, 193)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + addr = kvm_s390_get_base_disp_siy(vcpu, &ar); + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + return 0; +} + static int handle_stidp(struct kvm_vcpu *vcpu) { u64 stidp_data = vcpu->kvm->arch.model.cpuid; @@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) case 0x61: case 0x62: return handle_ri(vcpu); + case 0x71: + return handle_lpswey(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index abb629d7e1319..7e3e767ab87d6 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) void crst_table_free(struct mm_struct *mm, unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } @@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val) static void base_crst_free(unsigned long *table) { + if (!table) + return; pagetable_free(virt_to_ptdesc(table)); } diff --git a/arch/x86/configs/rock-dbg_defconfig b/arch/x86/configs/rock-dbg_defconfig new file mode 100644 index 0000000000000..0ad80a8c8eab0 --- /dev/null +++ b/arch/x86/configs/rock-dbg_defconfig @@ -0,0 +1,551 @@ +CONFIG_LOCALVERSION="-kfd" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y +# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_CPU_ISOLATION is not set +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_NUMA_BALANCING=y +CONFIG_MEMCG=y +CONFIG_BLK_CGROUP=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_USERFAULTFD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_SMP=y +# CONFIG_RETPOLINE is not set +CONFIG_X86_INTEL_LPSS=y +CONFIG_IOSF_MBI_DEBUG=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PROCESSOR_SELECT=y +CONFIG_GART_IOMMU=y +CONFIG_NR_CPUS=256 +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y +CONFIG_I8K=m +CONFIG_MICROCODE_AMD=y +CONFIG_MICROCODE_OLD_INTERFACE=y +CONFIG_X86_MSR=m +CONFIG_X86_CPUID=m +# CONFIG_X86_5LEVEL is not set +CONFIG_NUMA=y +CONFIG_ARCH_MEMORY_PROBE=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=1 +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_EFI_MIXED=y +CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y +CONFIG_CRASH_DUMP=y +CONFIG_KEXEC_JUMP=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_LEGACY_VSYSCALL_EMULATE=y +CONFIG_HIBERNATION=y +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_DEBUG=y +CONFIG_PM_ADVANCED_DEBUG=y +CONFIG_PM_TRACE_RTC=y +CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y +CONFIG_ACPI_EC_DEBUGFS=m +CONFIG_ACPI_VIDEO=m +CONFIG_ACPI_DOCK=y +CONFIG_ACPI_PROCESSOR_AGGREGATOR=m +CONFIG_ACPI_PCI_SLOT=y +CONFIG_ACPI_HOTPLUG_MEMORY=y +CONFIG_ACPI_SBS=m +CONFIG_ACPI_BGRT=y +CONFIG_ACPI_HMAT=y +CONFIG_ACPI_APEI=y +CONFIG_ACPI_APEI_GHES=y +CONFIG_ACPI_APEI_PCIEAER=y +CONFIG_ACPI_APEI_MEMORY_FAILURE=y +CONFIG_ACPI_APEI_EINJ=m +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_ACPI_CPUFREQ_CPB is not set +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_IA32_EMULATION=y +CONFIG_JUMP_LABEL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_SHA512=y +CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_BINFMT_MISC=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_MEMORY_FAILURE=y +CONFIG_HWPOISON_INJECT=m +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CLEANCACHE=y +CONFIG_FRONTSWAP=y +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_MEM_SOFT_DIRTY=y +CONFIG_ZSWAP=y +CONFIG_ZSMALLOC=y +CONFIG_ZONE_DEVICE=y +CONFIG_DEVICE_PRIVATE=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_XFRM_USER=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_INET_RAW_DIAG=m +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_ZONES=y +# CONFIG_NF_CONNTRACK_PROCFS is not set +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_CT_NETLINK_HELPER=m +CONFIG_NETFILTER_NETLINK_GLUE_CT=y +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NFT_CT=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_CONNLIMIT=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_NAT=m +CONFIG_NFT_TUNNEL=m +CONFIG_NFT_QUOTA=m +CONFIG_NFT_REJECT=m +CONFIG_NFT_XFRM=m +CONFIG_NF_DUP_NETDEV=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LED=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CGROUP=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_L2TP=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_DUP_IPV4=m +CONFIG_NFT_FIB_IPV4=m +CONFIG_NF_TABLES_ARP=y +CONFIG_NF_LOG_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_SYNPROXY=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=y +CONFIG_NET_SCHED=y +CONFIG_NET_EMATCH=y +CONFIG_NET_CLS_ACT=y +CONFIG_NETLINK_DIAG=y +CONFIG_HAMRADIO=y +CONFIG_RFKILL=y +CONFIG_RFKILL_INPUT=y +CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +CONFIG_HOTPLUG_PCI_PCIE=y +CONFIG_PCIEAER=y +CONFIG_PCI_REALLOC_ENABLE_AUTO=y +CONFIG_PCI_STUB=y +CONFIG_PCI_IOV=y +CONFIG_PCI_P2PDMA=y +CONFIG_HOTPLUG_PCI=y +CONFIG_RAPIDIO=y +CONFIG_RAPIDIO_DMA_ENGINE=y +CONFIG_UEVENT_HELPER=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_EFI_VARS=y +CONFIG_PARPORT=y +CONFIG_PARPORT_PC=y +CONFIG_PARPORT_SERIAL=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_NVME=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_SATA_SIL24=y +CONFIG_SATA_SX4=y +CONFIG_ATA_PIIX=y +CONFIG_SATA_PROMISE=y +CONFIG_SATA_SIL=y +CONFIG_PATA_AMD=y +CONFIG_PATA_ATIIXP=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_SERVERWORKS=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_NETDEVICES=y +CONFIG_MACVLAN=y +CONFIG_NETCONSOLE=y +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_TUN=y +CONFIG_VETH=y +CONFIG_ALX=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_CAVIUM_PTP=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_IGB=y +CONFIG_IGBVF=y +CONFIG_IXGB=y +CONFIG_IXGBE=y +CONFIG_I40E=y +CONFIG_SKY2=y +CONFIG_FORCEDETH=y +CONFIG_8139CP=y +CONFIG_8139TOO=y +CONFIG_R8169=y +CONFIG_WIZNET_W5100=y +CONFIG_WIZNET_W5300=y +CONFIG_FDDI=y +CONFIG_AMD_PHY=y +CONFIG_USB_NET_DRIVERS=m +CONFIG_USB_RTL8152=m +CONFIG_USB_USBNET=m +# CONFIG_USB_ARMLINUX is not set +# CONFIG_USB_NET_ZAURUS is not set +CONFIG_INPUT_SPARSEKMAP=y +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_EVDEV=y +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +# CONFIG_SERIAL_8250_MID is not set +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C_I801=y +CONFIG_I2C_PIIX4=m +CONFIG_SENSORS_K10TEMP=m +CONFIG_WATCHDOG=y +CONFIG_RC_CORE=y +CONFIG_RC_DECODERS=y +CONFIG_IR_NEC_DECODER=y +CONFIG_IR_RC5_DECODER=y +CONFIG_IR_RC6_DECODER=y +CONFIG_IR_JVC_DECODER=y +CONFIG_IR_SONY_DECODER=y +CONFIG_IR_SANYO_DECODER=y +CONFIG_IR_SHARP_DECODER=y +CONFIG_IR_MCE_KBD_DECODER=y +CONFIG_IR_XMP_DECODER=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=m +CONFIG_DRM_AMDGPU=m +CONFIG_DRM_AMDGPU_SI=y +CONFIG_DRM_AMDGPU_CIK=y +CONFIG_HSA_AMD=y +CONFIG_HSA_AMD_P2P=y +CONFIG_DRM_AST=m +CONFIG_FB=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_HID_BATTERY_STRENGTH=y +CONFIG_HIDRAW=y +CONFIG_HID_A4TECH=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_EZKEY=y +CONFIG_HID_KYE=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_PLANTRONICS=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_DMABUF_MOVE_NOTIFY=y +# CONFIG_X86_PLATFORM_DEVICES is not set +CONFIG_AMD_IOMMU=y +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON is not set +CONFIG_DAX=y +CONFIG_VALIDATE_FS_PARSER=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_XFS_RT=y +CONFIG_XFS_WARN=y +CONFIG_FANOTIFY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_OVERLAY_FS=y +CONFIG_FSCACHE=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=y +CONFIG_NTFS_RW=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=y +# CONFIG_EFIVAR_FS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_CIFS=y +# CONFIG_CIFS_STATS2 is not set +CONFIG_CIFS_FSCACHE=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" +CONFIG_CRYPTO_AUTHENC=y +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_MD4=y +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_DES=y +CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_KGDB=y +CONFIG_KGDB_LOW_LEVEL_TRAP=y +CONFIG_KGDB_KDB=y +CONFIG_KDB_KEYBOARD=y +CONFIG_DEBUG_RODATA_TEST=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_SCHEDSTATS=y +CONFIG_PROVE_LOCKING=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_RCU_TRACE is not set +CONFIG_FUNCTION_PROFILER=y +CONFIG_STACK_TRACER=y +CONFIG_SCHED_TRACER=y +CONFIG_HWLAT_TRACER=y +CONFIG_MMIOTRACE=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_HIST_TRIGGERS=y +# CONFIG_STRICT_DEVMEM is not set +# CONFIG_X86_VERBOSE_BOOTUP is not set +CONFIG_IO_DELAY_0XED=y +# CONFIG_RUNTIME_TESTING_MENU is not set +CONFIG_MEMTEST=y diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 11c9b8efdc4cc..ed0a5f2dc1297 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -89,10 +89,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld - IBRS_ENTER - UNTRAIN_RET - CLEAR_BRANCH_HISTORY - /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -116,6 +112,16 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) jnz .Lsysenter_fix_flags .Lsysenter_flags_fixed: + /* + * CPU bugs mitigations mechanisms can call other functions. They + * should be invoked after making sure TF is cleared because + * single-step is ignored only for instructions inside the + * entry_SYSENTER_compat function. + */ + IBRS_ENTER + UNTRAIN_RET + CLEAR_BRANCH_HISTORY + movq %rsp, %rdi call do_SYSENTER_32 jmp sysret32_from_system_call diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index 08010dbf5e09a..df275d5547884 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -19,7 +19,7 @@ struct task_struct; -static inline struct task_struct *get_current(void) +static __always_inline struct task_struct *get_current(void) { return current_thread_info()->task; } diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 326db1c1d5d8d..e0dffcc43b9e6 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -91,7 +91,7 @@ struct thread_info { } /* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) +static __always_inline struct thread_info *current_thread_info(void) { struct thread_info *ti; __asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t" diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index bd6a7857ce058..831fa4a121598 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -16,7 +16,6 @@ #include #include #include /* need_resched() */ -#include #include #include #include @@ -386,25 +385,24 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 1); } -static int acpi_cst_latency_cmp(const void *a, const void *b) +static void acpi_cst_latency_sort(struct acpi_processor_cx *states, size_t length) { - const struct acpi_processor_cx *x = a, *y = b; + int i, j, k; - if (!(x->valid && y->valid)) - return 0; - if (x->latency > y->latency) - return 1; - if (x->latency < y->latency) - return -1; - return 0; -} -static void acpi_cst_latency_swap(void *a, void *b, int n) -{ - struct acpi_processor_cx *x = a, *y = b; + for (i = 1; i < length; i++) { + if (!states[i].valid) + continue; - if (!(x->valid && y->valid)) - return; - swap(x->latency, y->latency); + for (j = i - 1, k = i; j >= 0; j--) { + if (!states[j].valid) + continue; + + if (states[j].latency > states[k].latency) + swap(states[j].latency, states[k].latency); + + k = j; + } + } } static int acpi_processor_power_verify(struct acpi_processor *pr) @@ -449,10 +447,7 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) if (buggy_latency) { pr_notice("FW issue: working around C-state latencies out of order\n"); - sort(&pr->power.states[1], max_cstate, - sizeof(struct acpi_processor_cx), - acpi_cst_latency_cmp, - acpi_cst_latency_swap); + acpi_cst_latency_sort(&pr->power.states[1], max_cstate); } lapic_timer_propagate_broadcast(pr); diff --git a/drivers/block/umem.c b/drivers/block/umem.c new file mode 100644 index 0000000000000..664280f23bee1 --- /dev/null +++ b/drivers/block/umem.c @@ -0,0 +1,1130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * mm.c - Micro Memory(tm) PCI memory board block device driver - v2.3 + * + * (C) 2001 San Mehat + * (C) 2001 Johannes Erdfelt + * (C) 2001 NeilBrown + * + * This driver for the Micro Memory PCI Memory Module with Battery Backup + * is Copyright Micro Memory Inc 2001-2002. All rights reserved. + * + * This driver provides a standard block device interface for Micro Memory(tm) + * PCI based RAM boards. + * 10/05/01: Phap Nguyen - Rebuilt the driver + * 10/22/01: Phap Nguyen - v2.1 Added disk partitioning + * 29oct2001:NeilBrown - Use make_request_fn instead of request_fn + * - use stand disk partitioning (so fdisk works). + * 08nov2001:NeilBrown - change driver name from "mm" to "umem" + * - incorporate into main kernel + * 08apr2002:NeilBrown - Move some of interrupt handle to tasklet + * - use spin_lock_bh instead of _irq + * - Never block on make_request. queue + * bh's instead. + * - unregister umem from devfs at mod unload + * - Change version to 2.3 + * 07Nov2001:Phap Nguyen - Select pci read command: 06, 12, 15 (Decimal) + * 07Jan2002: P. Nguyen - Used PCI Memory Write & Invalidate for DMA + * 15May2002:NeilBrown - convert to bio for 2.5 + * 17May2002:NeilBrown - remove init_mem initialisation. Instead detect + * - a sequence of writes that cover the card, and + * - set initialised bit then. + */ + +#undef DEBUG /* #define DEBUG if you want debugging info (pr_debug) */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* O_ACCMODE */ +#include /* HDIO_GETGEO */ + +#include "umem.h" + +#include +#include + +#define MM_MAXCARDS 4 +#define MM_RAHEAD 2 /* two sectors */ +#define MM_BLKSIZE 1024 /* 1k blocks */ +#define MM_HARDSECT 512 /* 512-byte hardware sectors */ +#define MM_SHIFT 6 /* max 64 partitions on 4 cards */ + +/* + * Version Information + */ + +#define DRIVER_NAME "umem" +#define DRIVER_VERSION "v2.3" +#define DRIVER_AUTHOR "San Mehat, Johannes Erdfelt, NeilBrown" +#define DRIVER_DESC "Micro Memory(tm) PCI memory board block driver" + +static int debug; +/* #define HW_TRACE(x) writeb(x,cards[0].csr_remap + MEMCTRLSTATUS_MAGIC) */ +#define HW_TRACE(x) + +#define DEBUG_LED_ON_TRANSFER 0x01 +#define DEBUG_BATTERY_POLLING 0x02 + +module_param(debug, int, 0644); +MODULE_PARM_DESC(debug, "Debug bitmask"); + +static int pci_read_cmd = 0x0C; /* Read Multiple */ +module_param(pci_read_cmd, int, 0); +MODULE_PARM_DESC(pci_read_cmd, "PCI read command"); + +static int pci_write_cmd = 0x0F; /* Write and Invalidate */ +module_param(pci_write_cmd, int, 0); +MODULE_PARM_DESC(pci_write_cmd, "PCI write command"); + +static int pci_cmds; + +static int major_nr; + +#include +#include + +struct cardinfo { + struct pci_dev *dev; + + unsigned char __iomem *csr_remap; + unsigned int mm_size; /* size in kbytes */ + + unsigned int init_size; /* initial segment, in sectors, + * that we know to + * have been written + */ + struct bio *bio, *currentbio, **biotail; + struct bvec_iter current_iter; + + struct request_queue *queue; + + struct mm_page { + dma_addr_t page_dma; + struct mm_dma_desc *desc; + int cnt, headcnt; + struct bio *bio, **biotail; + struct bvec_iter iter; + } mm_pages[2]; +#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc)) + + int Active, Ready; + + struct tasklet_struct tasklet; + unsigned int dma_status; + + struct { + int good; + int warned; + unsigned long last_change; + } battery[2]; + + spinlock_t lock; + int check_batteries; + + int flags; +}; + +static struct cardinfo cards[MM_MAXCARDS]; +static struct timer_list battery_timer; + +static int num_cards; + +static struct gendisk *mm_gendisk[MM_MAXCARDS]; + +static void check_batteries(struct cardinfo *card); + +static int get_userbit(struct cardinfo *card, int bit) +{ + unsigned char led; + + led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL); + return led & bit; +} + +static int set_userbit(struct cardinfo *card, int bit, unsigned char state) +{ + unsigned char led; + + led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL); + if (state) + led |= bit; + else + led &= ~bit; + writeb(led, card->csr_remap + MEMCTRLCMD_LEDCTRL); + + return 0; +} + +/* + * NOTE: For the power LED, use the LED_POWER_* macros since they differ + */ +static void set_led(struct cardinfo *card, int shift, unsigned char state) +{ + unsigned char led; + + led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL); + if (state == LED_FLIP) + led ^= (1<csr_remap + MEMCTRLCMD_LEDCTRL); + +} + +#ifdef MM_DIAG +static void dump_regs(struct cardinfo *card) +{ + unsigned char *p; + int i, i1; + + p = card->csr_remap; + for (i = 0; i < 8; i++) { + printk(KERN_DEBUG "%p ", p); + + for (i1 = 0; i1 < 16; i1++) + printk("%02x ", *p++); + + printk("\n"); + } +} +#endif + +static void dump_dmastat(struct cardinfo *card, unsigned int dmastat) +{ + dev_printk(KERN_DEBUG, &card->dev->dev, "DMAstat - "); + if (dmastat & DMASCR_ANY_ERR) + printk(KERN_CONT "ANY_ERR "); + if (dmastat & DMASCR_MBE_ERR) + printk(KERN_CONT "MBE_ERR "); + if (dmastat & DMASCR_PARITY_ERR_REP) + printk(KERN_CONT "PARITY_ERR_REP "); + if (dmastat & DMASCR_PARITY_ERR_DET) + printk(KERN_CONT "PARITY_ERR_DET "); + if (dmastat & DMASCR_SYSTEM_ERR_SIG) + printk(KERN_CONT "SYSTEM_ERR_SIG "); + if (dmastat & DMASCR_TARGET_ABT) + printk(KERN_CONT "TARGET_ABT "); + if (dmastat & DMASCR_MASTER_ABT) + printk(KERN_CONT "MASTER_ABT "); + if (dmastat & DMASCR_CHAIN_COMPLETE) + printk(KERN_CONT "CHAIN_COMPLETE "); + if (dmastat & DMASCR_DMA_COMPLETE) + printk(KERN_CONT "DMA_COMPLETE "); + printk("\n"); +} + +/* + * Theory of request handling + * + * Each bio is assigned to one mm_dma_desc - which may not be enough FIXME + * We have two pages of mm_dma_desc, holding about 64 descriptors + * each. These are allocated at init time. + * One page is "Ready" and is either full, or can have request added. + * The other page might be "Active", which DMA is happening on it. + * + * Whenever IO on the active page completes, the Ready page is activated + * and the ex-Active page is clean out and made Ready. + * Otherwise the Ready page is only activated when it becomes full. + * + * If a request arrives while both pages a full, it is queued, and b_rdev is + * overloaded to record whether it was a read or a write. + * + * The interrupt handler only polls the device to clear the interrupt. + * The processing of the result is done in a tasklet. + */ + +static void mm_start_io(struct cardinfo *card) +{ + /* we have the lock, we know there is + * no IO active, and we know that card->Active + * is set + */ + struct mm_dma_desc *desc; + struct mm_page *page; + int offset; + + /* make the last descriptor end the chain */ + page = &card->mm_pages[card->Active]; + pr_debug("start_io: %d %d->%d\n", + card->Active, page->headcnt, page->cnt - 1); + desc = &page->desc[page->cnt-1]; + + desc->control_bits |= cpu_to_le32(DMASCR_CHAIN_COMP_EN); + desc->control_bits &= ~cpu_to_le32(DMASCR_CHAIN_EN); + desc->sem_control_bits = desc->control_bits; + + + if (debug & DEBUG_LED_ON_TRANSFER) + set_led(card, LED_REMOVE, LED_ON); + + desc = &page->desc[page->headcnt]; + writel(0, card->csr_remap + DMA_PCI_ADDR); + writel(0, card->csr_remap + DMA_PCI_ADDR + 4); + + writel(0, card->csr_remap + DMA_LOCAL_ADDR); + writel(0, card->csr_remap + DMA_LOCAL_ADDR + 4); + + writel(0, card->csr_remap + DMA_TRANSFER_SIZE); + writel(0, card->csr_remap + DMA_TRANSFER_SIZE + 4); + + writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR); + writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR + 4); + + offset = ((char *)desc) - ((char *)page->desc); + writel(cpu_to_le32((page->page_dma+offset) & 0xffffffff), + card->csr_remap + DMA_DESCRIPTOR_ADDR); + /* Force the value to u64 before shifting otherwise >> 32 is undefined C + * and on some ports will do nothing ! */ + writel(cpu_to_le32(((u64)page->page_dma)>>32), + card->csr_remap + DMA_DESCRIPTOR_ADDR + 4); + + /* Go, go, go */ + writel(cpu_to_le32(DMASCR_GO | DMASCR_CHAIN_EN | pci_cmds), + card->csr_remap + DMA_STATUS_CTRL); +} + +static int add_bio(struct cardinfo *card); + +static void activate(struct cardinfo *card) +{ + /* if No page is Active, and Ready is + * not empty, then switch Ready page + * to active and start IO. + * Then add any bh's that are available to Ready + */ + + do { + while (add_bio(card)) + ; + + if (card->Active == -1 && + card->mm_pages[card->Ready].cnt > 0) { + card->Active = card->Ready; + card->Ready = 1-card->Ready; + mm_start_io(card); + } + + } while (card->Active == -1 && add_bio(card)); +} + +static inline void reset_page(struct mm_page *page) +{ + page->cnt = 0; + page->headcnt = 0; + page->bio = NULL; + page->biotail = &page->bio; +} + +/* + * If there is room on Ready page, take + * one bh off list and add it. + * return 1 if there was room, else 0. + */ +static int add_bio(struct cardinfo *card) +{ + struct mm_page *p; + struct mm_dma_desc *desc; + dma_addr_t dma_handle; + int offset; + struct bio *bio; + struct bio_vec vec; + + bio = card->currentbio; + if (!bio && card->bio) { + card->currentbio = card->bio; + card->current_iter = card->bio->bi_iter; + card->bio = card->bio->bi_next; + if (card->bio == NULL) + card->biotail = &card->bio; + card->currentbio->bi_next = NULL; + return 1; + } + if (!bio) + return 0; + + if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE) + return 0; + + vec = bio_iter_iovec(bio, card->current_iter); + + dma_handle = dma_map_page(&card->dev->dev, + vec.bv_page, + vec.bv_offset, + vec.bv_len, + bio_op(bio) == REQ_OP_READ ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + + p = &card->mm_pages[card->Ready]; + desc = &p->desc[p->cnt]; + p->cnt++; + if (p->bio == NULL) + p->iter = card->current_iter; + if ((p->biotail) != &bio->bi_next) { + *(p->biotail) = bio; + p->biotail = &(bio->bi_next); + bio->bi_next = NULL; + } + + desc->data_dma_handle = dma_handle; + + desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle); + desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9); + desc->transfer_size = cpu_to_le32(vec.bv_len); + offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc)); + desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset)); + desc->zero1 = desc->zero2 = 0; + offset = (((char *)(desc+1)) - ((char *)p->desc)); + desc->next_desc_addr = cpu_to_le64(p->page_dma+offset); + desc->control_bits = cpu_to_le32(DMASCR_GO|DMASCR_ERR_INT_EN| + DMASCR_PARITY_INT_EN| + DMASCR_CHAIN_EN | + DMASCR_SEM_EN | + pci_cmds); + if (bio_op(bio) == REQ_OP_WRITE) + desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ); + desc->sem_control_bits = desc->control_bits; + + + bio_advance_iter(bio, &card->current_iter, vec.bv_len); + if (!card->current_iter.bi_size) + card->currentbio = NULL; + + return 1; +} + +static void process_page(unsigned long data) +{ + /* check if any of the requests in the page are DMA_COMPLETE, + * and deal with them appropriately. + * If we find a descriptor without DMA_COMPLETE in the semaphore, then + * dma must have hit an error on that descriptor, so use dma_status + * instead and assume that all following descriptors must be re-tried. + */ + struct mm_page *page; + struct bio *return_bio = NULL; + struct cardinfo *card = (struct cardinfo *)data; + unsigned int dma_status = card->dma_status; + + spin_lock(&card->lock); + if (card->Active < 0) + goto out_unlock; + page = &card->mm_pages[card->Active]; + + while (page->headcnt < page->cnt) { + struct bio *bio = page->bio; + struct mm_dma_desc *desc = &page->desc[page->headcnt]; + int control = le32_to_cpu(desc->sem_control_bits); + int last = 0; + struct bio_vec vec; + + if (!(control & DMASCR_DMA_COMPLETE)) { + control = dma_status; + last = 1; + } + + page->headcnt++; + vec = bio_iter_iovec(bio, page->iter); + bio_advance_iter(bio, &page->iter, vec.bv_len); + + if (!page->iter.bi_size) { + page->bio = bio->bi_next; + if (page->bio) + page->iter = page->bio->bi_iter; + } + + dma_unmap_page(&card->dev->dev, desc->data_dma_handle, + vec.bv_len, + (control & DMASCR_TRANSFER_READ) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (control & DMASCR_HARD_ERROR) { + /* error */ + bio->bi_status = BLK_STS_IOERR; + dev_printk(KERN_WARNING, &card->dev->dev, + "I/O error on sector %d/%d\n", + le32_to_cpu(desc->local_addr)>>9, + le32_to_cpu(desc->transfer_size)); + dump_dmastat(card, control); + } else if (op_is_write(bio_op(bio)) && + le32_to_cpu(desc->local_addr) >> 9 == + card->init_size) { + card->init_size += le32_to_cpu(desc->transfer_size) >> 9; + if (card->init_size >> 1 >= card->mm_size) { + dev_printk(KERN_INFO, &card->dev->dev, + "memory now initialised\n"); + set_userbit(card, MEMORY_INITIALIZED, 1); + } + } + if (bio != page->bio) { + bio->bi_next = return_bio; + return_bio = bio; + } + + if (last) + break; + } + + if (debug & DEBUG_LED_ON_TRANSFER) + set_led(card, LED_REMOVE, LED_OFF); + + if (card->check_batteries) { + card->check_batteries = 0; + check_batteries(card); + } + if (page->headcnt >= page->cnt) { + reset_page(page); + card->Active = -1; + activate(card); + } else { + /* haven't finished with this one yet */ + pr_debug("do some more\n"); + mm_start_io(card); + } + out_unlock: + spin_unlock(&card->lock); + + while (return_bio) { + struct bio *bio = return_bio; + + return_bio = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio); + } +} + +static void mm_unplug(struct blk_plug_cb *cb, bool from_schedule) +{ + struct cardinfo *card = cb->data; + + spin_lock_irq(&card->lock); + activate(card); + spin_unlock_irq(&card->lock); + kfree(cb); +} + +static int mm_check_plugged(struct cardinfo *card) +{ + return !!blk_check_plugged(mm_unplug, card, sizeof(struct blk_plug_cb)); +} + +static blk_qc_t mm_submit_bio(struct bio *bio) +{ + struct cardinfo *card = bio->bi_bdev->bd_disk->private_data; + + pr_debug("mm_make_request %llu %u\n", + (unsigned long long)bio->bi_iter.bi_sector, + bio->bi_iter.bi_size); + + blk_queue_split(&bio); + + spin_lock_irq(&card->lock); + *card->biotail = bio; + bio->bi_next = NULL; + card->biotail = &bio->bi_next; + if (op_is_sync(bio->bi_opf) || !mm_check_plugged(card)) + activate(card); + spin_unlock_irq(&card->lock); + + return BLK_QC_T_NONE; +} + +static irqreturn_t mm_interrupt(int irq, void *__card) +{ + struct cardinfo *card = (struct cardinfo *) __card; + unsigned int dma_status; + unsigned short cfg_status; + +HW_TRACE(0x30); + + dma_status = le32_to_cpu(readl(card->csr_remap + DMA_STATUS_CTRL)); + + if (!(dma_status & (DMASCR_ERROR_MASK | DMASCR_CHAIN_COMPLETE))) { + /* interrupt wasn't for me ... */ + return IRQ_NONE; + } + + /* clear COMPLETION interrupts */ + if (card->flags & UM_FLAG_NO_BYTE_STATUS) + writel(cpu_to_le32(DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE), + card->csr_remap + DMA_STATUS_CTRL); + else + writeb((DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE) >> 16, + card->csr_remap + DMA_STATUS_CTRL + 2); + + /* log errors and clear interrupt status */ + if (dma_status & DMASCR_ANY_ERR) { + unsigned int data_log1, data_log2; + unsigned int addr_log1, addr_log2; + unsigned char stat, count, syndrome, check; + + stat = readb(card->csr_remap + MEMCTRLCMD_ERRSTATUS); + + data_log1 = le32_to_cpu(readl(card->csr_remap + + ERROR_DATA_LOG)); + data_log2 = le32_to_cpu(readl(card->csr_remap + + ERROR_DATA_LOG + 4)); + addr_log1 = le32_to_cpu(readl(card->csr_remap + + ERROR_ADDR_LOG)); + addr_log2 = readb(card->csr_remap + ERROR_ADDR_LOG + 4); + + count = readb(card->csr_remap + ERROR_COUNT); + syndrome = readb(card->csr_remap + ERROR_SYNDROME); + check = readb(card->csr_remap + ERROR_CHECK); + + dump_dmastat(card, dma_status); + + if (stat & 0x01) + dev_printk(KERN_ERR, &card->dev->dev, + "Memory access error detected (err count %d)\n", + count); + if (stat & 0x02) + dev_printk(KERN_ERR, &card->dev->dev, + "Multi-bit EDC error\n"); + + dev_printk(KERN_ERR, &card->dev->dev, + "Fault Address 0x%02x%08x, Fault Data 0x%08x%08x\n", + addr_log2, addr_log1, data_log2, data_log1); + dev_printk(KERN_ERR, &card->dev->dev, + "Fault Check 0x%02x, Fault Syndrome 0x%02x\n", + check, syndrome); + + writeb(0, card->csr_remap + ERROR_COUNT); + } + + if (dma_status & DMASCR_PARITY_ERR_REP) { + dev_printk(KERN_ERR, &card->dev->dev, + "PARITY ERROR REPORTED\n"); + pci_read_config_word(card->dev, PCI_STATUS, &cfg_status); + pci_write_config_word(card->dev, PCI_STATUS, cfg_status); + } + + if (dma_status & DMASCR_PARITY_ERR_DET) { + dev_printk(KERN_ERR, &card->dev->dev, + "PARITY ERROR DETECTED\n"); + pci_read_config_word(card->dev, PCI_STATUS, &cfg_status); + pci_write_config_word(card->dev, PCI_STATUS, cfg_status); + } + + if (dma_status & DMASCR_SYSTEM_ERR_SIG) { + dev_printk(KERN_ERR, &card->dev->dev, "SYSTEM ERROR\n"); + pci_read_config_word(card->dev, PCI_STATUS, &cfg_status); + pci_write_config_word(card->dev, PCI_STATUS, cfg_status); + } + + if (dma_status & DMASCR_TARGET_ABT) { + dev_printk(KERN_ERR, &card->dev->dev, "TARGET ABORT\n"); + pci_read_config_word(card->dev, PCI_STATUS, &cfg_status); + pci_write_config_word(card->dev, PCI_STATUS, cfg_status); + } + + if (dma_status & DMASCR_MASTER_ABT) { + dev_printk(KERN_ERR, &card->dev->dev, "MASTER ABORT\n"); + pci_read_config_word(card->dev, PCI_STATUS, &cfg_status); + pci_write_config_word(card->dev, PCI_STATUS, cfg_status); + } + + /* and process the DMA descriptors */ + card->dma_status = dma_status; + tasklet_schedule(&card->tasklet); + +HW_TRACE(0x36); + + return IRQ_HANDLED; +} + +/* + * If both batteries are good, no LED + * If either battery has been warned, solid LED + * If both batteries are bad, flash the LED quickly + * If either battery is bad, flash the LED semi quickly + */ +static void set_fault_to_battery_status(struct cardinfo *card) +{ + if (card->battery[0].good && card->battery[1].good) + set_led(card, LED_FAULT, LED_OFF); + else if (card->battery[0].warned || card->battery[1].warned) + set_led(card, LED_FAULT, LED_ON); + else if (!card->battery[0].good && !card->battery[1].good) + set_led(card, LED_FAULT, LED_FLASH_7_0); + else + set_led(card, LED_FAULT, LED_FLASH_3_5); +} + +static void init_battery_timer(void); + +static int check_battery(struct cardinfo *card, int battery, int status) +{ + if (status != card->battery[battery].good) { + card->battery[battery].good = !card->battery[battery].good; + card->battery[battery].last_change = jiffies; + + if (card->battery[battery].good) { + dev_printk(KERN_ERR, &card->dev->dev, + "Battery %d now good\n", battery + 1); + card->battery[battery].warned = 0; + } else + dev_printk(KERN_ERR, &card->dev->dev, + "Battery %d now FAILED\n", battery + 1); + + return 1; + } else if (!card->battery[battery].good && + !card->battery[battery].warned && + time_after_eq(jiffies, card->battery[battery].last_change + + (HZ * 60 * 60 * 5))) { + dev_printk(KERN_ERR, &card->dev->dev, + "Battery %d still FAILED after 5 hours\n", battery + 1); + card->battery[battery].warned = 1; + + return 1; + } + + return 0; +} + +static void check_batteries(struct cardinfo *card) +{ + /* NOTE: this must *never* be called while the card + * is doing (bus-to-card) DMA, or you will need the + * reset switch + */ + unsigned char status; + int ret1, ret2; + + status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY); + if (debug & DEBUG_BATTERY_POLLING) + dev_printk(KERN_DEBUG, &card->dev->dev, + "checking battery status, 1 = %s, 2 = %s\n", + (status & BATTERY_1_FAILURE) ? "FAILURE" : "OK", + (status & BATTERY_2_FAILURE) ? "FAILURE" : "OK"); + + ret1 = check_battery(card, 0, !(status & BATTERY_1_FAILURE)); + ret2 = check_battery(card, 1, !(status & BATTERY_2_FAILURE)); + + if (ret1 || ret2) + set_fault_to_battery_status(card); +} + +static void check_all_batteries(struct timer_list *unused) +{ + int i; + + for (i = 0; i < num_cards; i++) + if (!(cards[i].flags & UM_FLAG_NO_BATT)) { + struct cardinfo *card = &cards[i]; + spin_lock_bh(&card->lock); + if (card->Active >= 0) + card->check_batteries = 1; + else + check_batteries(card); + spin_unlock_bh(&card->lock); + } + + init_battery_timer(); +} + +static void init_battery_timer(void) +{ + timer_setup(&battery_timer, check_all_batteries, 0); + battery_timer.expires = jiffies + (HZ * 60); + add_timer(&battery_timer); +} + +static void del_battery_timer(void) +{ + del_timer(&battery_timer); +} + +/* + * Note no locks taken out here. In a worst case scenario, we could drop + * a chunk of system memory. But that should never happen, since validation + * happens at open or mount time, when locks are held. + * + * That's crap, since doing that while some partitions are opened + * or mounted will give you really nasty results. + */ +static int mm_revalidate(struct gendisk *disk) +{ + struct cardinfo *card = disk->private_data; + set_capacity(disk, card->mm_size << 1); + return 0; +} + +static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct cardinfo *card = bdev->bd_disk->private_data; + int size = card->mm_size * (1024 / MM_HARDSECT); + + /* + * get geometry: we have to fake one... trim the size to a + * multiple of 2048 (1M): tell we have 32 sectors, 64 heads, + * whatever cylinders. + */ + geo->heads = 64; + geo->sectors = 32; + geo->cylinders = size / (geo->heads * geo->sectors); + return 0; +} + +static const struct block_device_operations mm_fops = { + .owner = THIS_MODULE, + .submit_bio = mm_submit_bio, + .getgeo = mm_getgeo, + .revalidate_disk = mm_revalidate, +}; + +static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + int ret; + struct cardinfo *card = &cards[num_cards]; + unsigned char mem_present; + unsigned char batt_status; + unsigned int saved_bar, data; + unsigned long csr_base; + unsigned long csr_len; + int magic_number; + static int printed_version; + + if (!printed_version++) + printk(KERN_INFO DRIVER_VERSION " : " DRIVER_DESC "\n"); + + ret = pci_enable_device(dev); + if (ret) + return ret; + + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF8); + pci_set_master(dev); + + card->dev = dev; + + csr_base = pci_resource_start(dev, 0); + csr_len = pci_resource_len(dev, 0); + if (!csr_base || !csr_len) + return -ENODEV; + + dev_printk(KERN_INFO, &dev->dev, + "Micro Memory(tm) controller found (PCI Mem Module (Battery Backup))\n"); + + if (dma_set_mask(&dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask(&dev->dev, DMA_BIT_MASK(32))) { + dev_printk(KERN_WARNING, &dev->dev, "NO suitable DMA found\n"); + return -ENOMEM; + } + + ret = pci_request_regions(dev, DRIVER_NAME); + if (ret) { + dev_printk(KERN_ERR, &card->dev->dev, + "Unable to request memory region\n"); + goto failed_req_csr; + } + + card->csr_remap = ioremap(csr_base, csr_len); + if (!card->csr_remap) { + dev_printk(KERN_ERR, &card->dev->dev, + "Unable to remap memory region\n"); + ret = -ENOMEM; + + goto failed_remap_csr; + } + + dev_printk(KERN_INFO, &card->dev->dev, + "CSR 0x%08lx -> 0x%p (0x%lx)\n", + csr_base, card->csr_remap, csr_len); + + switch (card->dev->device) { + case 0x5415: + card->flags |= UM_FLAG_NO_BYTE_STATUS | UM_FLAG_NO_BATTREG; + magic_number = 0x59; + break; + + case 0x5425: + card->flags |= UM_FLAG_NO_BYTE_STATUS; + magic_number = 0x5C; + break; + + case 0x6155: + card->flags |= UM_FLAG_NO_BYTE_STATUS | + UM_FLAG_NO_BATTREG | UM_FLAG_NO_BATT; + magic_number = 0x99; + break; + + default: + magic_number = 0x100; + break; + } + + if (readb(card->csr_remap + MEMCTRLSTATUS_MAGIC) != magic_number) { + dev_printk(KERN_ERR, &card->dev->dev, "Magic number invalid\n"); + ret = -ENOMEM; + goto failed_magic; + } + + card->mm_pages[0].desc = dma_alloc_coherent(&card->dev->dev, + PAGE_SIZE * 2, &card->mm_pages[0].page_dma, GFP_KERNEL); + card->mm_pages[1].desc = dma_alloc_coherent(&card->dev->dev, + PAGE_SIZE * 2, &card->mm_pages[1].page_dma, GFP_KERNEL); + if (card->mm_pages[0].desc == NULL || + card->mm_pages[1].desc == NULL) { + dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n"); + ret = -ENOMEM; + goto failed_alloc; + } + reset_page(&card->mm_pages[0]); + reset_page(&card->mm_pages[1]); + card->Ready = 0; /* page 0 is ready */ + card->Active = -1; /* no page is active */ + card->bio = NULL; + card->biotail = &card->bio; + spin_lock_init(&card->lock); + + card->queue = blk_alloc_queue(NUMA_NO_NODE); + if (!card->queue) { + ret = -ENOMEM; + goto failed_alloc; + } + + tasklet_init(&card->tasklet, process_page, (unsigned long)card); + + card->check_batteries = 0; + + mem_present = readb(card->csr_remap + MEMCTRLSTATUS_MEMORY); + switch (mem_present) { + case MEM_128_MB: + card->mm_size = 1024 * 128; + break; + case MEM_256_MB: + card->mm_size = 1024 * 256; + break; + case MEM_512_MB: + card->mm_size = 1024 * 512; + break; + case MEM_1_GB: + card->mm_size = 1024 * 1024; + break; + case MEM_2_GB: + card->mm_size = 1024 * 2048; + break; + default: + card->mm_size = 0; + break; + } + + /* Clear the LED's we control */ + set_led(card, LED_REMOVE, LED_OFF); + set_led(card, LED_FAULT, LED_OFF); + + batt_status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY); + + card->battery[0].good = !(batt_status & BATTERY_1_FAILURE); + card->battery[1].good = !(batt_status & BATTERY_2_FAILURE); + card->battery[0].last_change = card->battery[1].last_change = jiffies; + + if (card->flags & UM_FLAG_NO_BATT) + dev_printk(KERN_INFO, &card->dev->dev, + "Size %d KB\n", card->mm_size); + else { + dev_printk(KERN_INFO, &card->dev->dev, + "Size %d KB, Battery 1 %s (%s), Battery 2 %s (%s)\n", + card->mm_size, + batt_status & BATTERY_1_DISABLED ? "Disabled" : "Enabled", + card->battery[0].good ? "OK" : "FAILURE", + batt_status & BATTERY_2_DISABLED ? "Disabled" : "Enabled", + card->battery[1].good ? "OK" : "FAILURE"); + + set_fault_to_battery_status(card); + } + + pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &saved_bar); + data = 0xffffffff; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, data); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &data); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, saved_bar); + data &= 0xfffffff0; + data = ~data; + data += 1; + + if (request_irq(dev->irq, mm_interrupt, IRQF_SHARED, DRIVER_NAME, + card)) { + dev_printk(KERN_ERR, &card->dev->dev, + "Unable to allocate IRQ\n"); + ret = -ENODEV; + goto failed_req_irq; + } + + dev_printk(KERN_INFO, &card->dev->dev, + "Window size %d bytes, IRQ %d\n", data, dev->irq); + + pci_set_drvdata(dev, card); + + if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */ + pci_write_cmd = 0x07; /* then Memory Write command */ + + if (pci_write_cmd & 0x08) { /* use Memory Write and Invalidate */ + unsigned short cfg_command; + pci_read_config_word(dev, PCI_COMMAND, &cfg_command); + cfg_command |= 0x10; /* Memory Write & Invalidate Enable */ + pci_write_config_word(dev, PCI_COMMAND, cfg_command); + } + pci_cmds = (pci_read_cmd << 28) | (pci_write_cmd << 24); + + num_cards++; + + if (!get_userbit(card, MEMORY_INITIALIZED)) { + dev_printk(KERN_INFO, &card->dev->dev, + "memory NOT initialized. Consider over-writing whole device.\n"); + card->init_size = 0; + } else { + dev_printk(KERN_INFO, &card->dev->dev, + "memory already initialized\n"); + card->init_size = card->mm_size; + } + + /* Enable ECC */ + writeb(EDC_STORE_CORRECT, card->csr_remap + MEMCTRLCMD_ERRCTRL); + + return 0; + + failed_req_irq: + failed_alloc: + if (card->mm_pages[0].desc) + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[0].desc, + card->mm_pages[0].page_dma); + if (card->mm_pages[1].desc) + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[1].desc, + card->mm_pages[1].page_dma); + failed_magic: + iounmap(card->csr_remap); + failed_remap_csr: + pci_release_regions(dev); + failed_req_csr: + + return ret; +} + +static void mm_pci_remove(struct pci_dev *dev) +{ + struct cardinfo *card = pci_get_drvdata(dev); + + tasklet_kill(&card->tasklet); + free_irq(dev->irq, card); + iounmap(card->csr_remap); + + if (card->mm_pages[0].desc) + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[0].desc, + card->mm_pages[0].page_dma); + if (card->mm_pages[1].desc) + dma_free_coherent(&card->dev->dev, PAGE_SIZE * 2, + card->mm_pages[1].desc, + card->mm_pages[1].page_dma); + blk_cleanup_queue(card->queue); + + pci_release_regions(dev); + pci_disable_device(dev); +} + +static const struct pci_device_id mm_pci_ids[] = { + {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5415CN)}, + {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5425CN)}, + {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_6155)}, + { + .vendor = 0x8086, + .device = 0xB555, + .subvendor = 0x1332, + .subdevice = 0x5460, + .class = 0x050000, + .class_mask = 0, + }, { /* end: all zeroes */ } +}; + +MODULE_DEVICE_TABLE(pci, mm_pci_ids); + +static struct pci_driver mm_pci_driver = { + .name = DRIVER_NAME, + .id_table = mm_pci_ids, + .probe = mm_pci_probe, + .remove = mm_pci_remove, +}; + +static int __init mm_init(void) +{ + int retval, i; + int err; + + retval = pci_register_driver(&mm_pci_driver); + if (retval) + return -ENOMEM; + + err = major_nr = register_blkdev(0, DRIVER_NAME); + if (err < 0) { + pci_unregister_driver(&mm_pci_driver); + return -EIO; + } + + for (i = 0; i < num_cards; i++) { + mm_gendisk[i] = alloc_disk(1 << MM_SHIFT); + if (!mm_gendisk[i]) + goto out; + } + + for (i = 0; i < num_cards; i++) { + struct gendisk *disk = mm_gendisk[i]; + sprintf(disk->disk_name, "umem%c", 'a'+i); + spin_lock_init(&cards[i].lock); + disk->major = major_nr; + disk->first_minor = i << MM_SHIFT; + disk->fops = &mm_fops; + disk->private_data = &cards[i]; + disk->queue = cards[i].queue; + set_capacity(disk, cards[i].mm_size << 1); + add_disk(disk); + } + + init_battery_timer(); + printk(KERN_INFO "MM: desc_per_page = %ld\n", DESC_PER_PAGE); +/* printk("mm_init: Done. 10-19-01 9:00\n"); */ + return 0; + +out: + pci_unregister_driver(&mm_pci_driver); + unregister_blkdev(major_nr, DRIVER_NAME); + while (i--) + put_disk(mm_gendisk[i]); + return -ENOMEM; +} + +static void __exit mm_cleanup(void) +{ + int i; + + del_battery_timer(); + + for (i = 0; i < num_cards ; i++) { + del_gendisk(mm_gendisk[i]); + put_disk(mm_gendisk[i]); + } + + pci_unregister_driver(&mm_pci_driver); + + unregister_blkdev(major_nr, DRIVER_NAME); +} + +module_init(mm_init); +module_exit(mm_cleanup); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); diff --git a/drivers/block/umem.h b/drivers/block/umem.h new file mode 100644 index 0000000000000..58384978ff054 --- /dev/null +++ b/drivers/block/umem.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * This file contains defines for the + * Micro Memory MM5415 + * family PCI Memory Module with Battery Backup. + * + * Copyright Micro Memory INC 2001. All rights reserved. + */ + +#ifndef _DRIVERS_BLOCK_MM_H +#define _DRIVERS_BLOCK_MM_H + + +#define IRQ_TIMEOUT (1 * HZ) + +/* CSR register definition */ +#define MEMCTRLSTATUS_MAGIC 0x00 +#define MM_MAGIC_VALUE (unsigned char)0x59 + +#define MEMCTRLSTATUS_BATTERY 0x04 +#define BATTERY_1_DISABLED 0x01 +#define BATTERY_1_FAILURE 0x02 +#define BATTERY_2_DISABLED 0x04 +#define BATTERY_2_FAILURE 0x08 + +#define MEMCTRLSTATUS_MEMORY 0x07 +#define MEM_128_MB 0xfe +#define MEM_256_MB 0xfc +#define MEM_512_MB 0xf8 +#define MEM_1_GB 0xf0 +#define MEM_2_GB 0xe0 + +#define MEMCTRLCMD_LEDCTRL 0x08 +#define LED_REMOVE 2 +#define LED_FAULT 4 +#define LED_POWER 6 +#define LED_FLIP 255 +#define LED_OFF 0x00 +#define LED_ON 0x01 +#define LED_FLASH_3_5 0x02 +#define LED_FLASH_7_0 0x03 +#define LED_POWER_ON 0x00 +#define LED_POWER_OFF 0x01 +#define USER_BIT1 0x01 +#define USER_BIT2 0x02 + +#define MEMORY_INITIALIZED USER_BIT1 + +#define MEMCTRLCMD_ERRCTRL 0x0C +#define EDC_NONE_DEFAULT 0x00 +#define EDC_NONE 0x01 +#define EDC_STORE_READ 0x02 +#define EDC_STORE_CORRECT 0x03 + +#define MEMCTRLCMD_ERRCNT 0x0D +#define MEMCTRLCMD_ERRSTATUS 0x0E + +#define ERROR_DATA_LOG 0x20 +#define ERROR_ADDR_LOG 0x28 +#define ERROR_COUNT 0x3D +#define ERROR_SYNDROME 0x3E +#define ERROR_CHECK 0x3F + +#define DMA_PCI_ADDR 0x40 +#define DMA_LOCAL_ADDR 0x48 +#define DMA_TRANSFER_SIZE 0x50 +#define DMA_DESCRIPTOR_ADDR 0x58 +#define DMA_SEMAPHORE_ADDR 0x60 +#define DMA_STATUS_CTRL 0x68 +#define DMASCR_GO 0x00001 +#define DMASCR_TRANSFER_READ 0x00002 +#define DMASCR_CHAIN_EN 0x00004 +#define DMASCR_SEM_EN 0x00010 +#define DMASCR_DMA_COMP_EN 0x00020 +#define DMASCR_CHAIN_COMP_EN 0x00040 +#define DMASCR_ERR_INT_EN 0x00080 +#define DMASCR_PARITY_INT_EN 0x00100 +#define DMASCR_ANY_ERR 0x00800 +#define DMASCR_MBE_ERR 0x01000 +#define DMASCR_PARITY_ERR_REP 0x02000 +#define DMASCR_PARITY_ERR_DET 0x04000 +#define DMASCR_SYSTEM_ERR_SIG 0x08000 +#define DMASCR_TARGET_ABT 0x10000 +#define DMASCR_MASTER_ABT 0x20000 +#define DMASCR_DMA_COMPLETE 0x40000 +#define DMASCR_CHAIN_COMPLETE 0x80000 + +/* +3.SOME PCs HAVE HOST BRIDGES WHICH APPARENTLY DO NOT CORRECTLY HANDLE +READ-LINE (0xE) OR READ-MULTIPLE (0xC) PCI COMMAND CODES DURING DMA +TRANSFERS. IN OTHER SYSTEMS THESE COMMAND CODES WILL CAUSE THE HOST BRIDGE +TO ALLOW LONGER BURSTS DURING DMA READ OPERATIONS. THE UPPER FOUR BITS +(31..28) OF THE DMA CSR HAVE BEEN MADE PROGRAMMABLE, SO THAT EITHER A 0x6, +AN 0xE OR A 0xC CAN BE WRITTEN TO THEM TO SET THE COMMAND CODE USED DURING +DMA READ OPERATIONS. +*/ +#define DMASCR_READ 0x60000000 +#define DMASCR_READLINE 0xE0000000 +#define DMASCR_READMULTI 0xC0000000 + + +#define DMASCR_ERROR_MASK (DMASCR_MASTER_ABT | DMASCR_TARGET_ABT | DMASCR_SYSTEM_ERR_SIG | DMASCR_PARITY_ERR_DET | DMASCR_MBE_ERR | DMASCR_ANY_ERR) +#define DMASCR_HARD_ERROR (DMASCR_MASTER_ABT | DMASCR_TARGET_ABT | DMASCR_SYSTEM_ERR_SIG | DMASCR_PARITY_ERR_DET | DMASCR_MBE_ERR) + +#define WINDOWMAP_WINNUM 0x7B + +#define DMA_READ_FROM_HOST 0 +#define DMA_WRITE_TO_HOST 1 + +struct mm_dma_desc { + __le64 pci_addr; + __le64 local_addr; + __le32 transfer_size; + u32 zero1; + __le64 next_desc_addr; + __le64 sem_addr; + __le32 control_bits; + u32 zero2; + + dma_addr_t data_dma_handle; + + /* Copy of the bits */ + __le64 sem_control_bits; +} __attribute__((aligned(8))); + +/* bits for card->flags */ +#define UM_FLAG_DMA_IN_REGS 1 +#define UM_FLAG_NO_BYTE_STATUS 2 +#define UM_FLAG_NO_BATTREG 4 +#define UM_FLAG_NO_BATT 8 +#endif diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c new file mode 100644 index 0000000000000..eb8ef65778c35 --- /dev/null +++ b/drivers/block/xsysace.c @@ -0,0 +1,1273 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Xilinx SystemACE device driver + * + * Copyright 2007 Secret Lab Technologies Ltd. + */ + +/* + * The SystemACE chip is designed to configure FPGAs by loading an FPGA + * bitstream from a file on a CF card and squirting it into FPGAs connected + * to the SystemACE JTAG chain. It also has the advantage of providing an + * MPU interface which can be used to control the FPGA configuration process + * and to use the attached CF card for general purpose storage. + * + * This driver is a block device driver for the SystemACE. + * + * Initialization: + * The driver registers itself as a platform_device driver at module + * load time. The platform bus will take care of calling the + * ace_probe() method for all SystemACE instances in the system. Any + * number of SystemACE instances are supported. ace_probe() calls + * ace_setup() which initialized all data structures, reads the CF + * id structure and registers the device. + * + * Processing: + * Just about all of the heavy lifting in this driver is performed by + * a Finite State Machine (FSM). The driver needs to wait on a number + * of events; some raised by interrupts, some which need to be polled + * for. Describing all of the behaviour in a FSM seems to be the + * easiest way to keep the complexity low and make it easy to + * understand what the driver is doing. If the block ops or the + * request function need to interact with the hardware, then they + * simply need to flag the request and kick of FSM processing. + * + * The FSM itself is atomic-safe code which can be run from any + * context. The general process flow is: + * 1. obtain the ace->lock spinlock. + * 2. loop on ace_fsm_dostate() until the ace->fsm_continue flag is + * cleared. + * 3. release the lock. + * + * Individual states do not sleep in any way. If a condition needs to + * be waited for then the state much clear the fsm_continue flag and + * either schedule the FSM to be run again at a later time, or expect + * an interrupt to call the FSM when the desired condition is met. + * + * In normal operation, the FSM is processed at interrupt context + * either when the driver's tasklet is scheduled, or when an irq is + * raised by the hardware. The tasklet can be scheduled at any time. + * The request method in particular schedules the tasklet when a new + * request has been indicated by the block layer. Once started, the + * FSM proceeds as far as it can processing the request until it + * needs on a hardware event. At this point, it must yield execution. + * + * A state has two options when yielding execution: + * 1. ace_fsm_yield() + * - Call if need to poll for event. + * - clears the fsm_continue flag to exit the processing loop + * - reschedules the tasklet to run again as soon as possible + * 2. ace_fsm_yieldirq() + * - Call if an irq is expected from the HW + * - clears the fsm_continue flag to exit the processing loop + * - does not reschedule the tasklet so the FSM will not be processed + * again until an irq is received. + * After calling a yield function, the state must return control back + * to the FSM main loop. + * + * Additionally, the driver maintains a kernel timer which can process + * the FSM. If the FSM gets stalled, typically due to a missed + * interrupt, then the kernel timer will expire and the driver can + * continue where it left off. + * + * To Do: + * - Add FPGA configuration control interface. + * - Request major number from lanana + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_OF) +#include +#include +#include +#endif + +MODULE_AUTHOR("Grant Likely "); +MODULE_DESCRIPTION("Xilinx SystemACE device driver"); +MODULE_LICENSE("GPL"); + +/* SystemACE register definitions */ +#define ACE_BUSMODE (0x00) + +#define ACE_STATUS (0x04) +#define ACE_STATUS_CFGLOCK (0x00000001) +#define ACE_STATUS_MPULOCK (0x00000002) +#define ACE_STATUS_CFGERROR (0x00000004) /* config controller error */ +#define ACE_STATUS_CFCERROR (0x00000008) /* CF controller error */ +#define ACE_STATUS_CFDETECT (0x00000010) +#define ACE_STATUS_DATABUFRDY (0x00000020) +#define ACE_STATUS_DATABUFMODE (0x00000040) +#define ACE_STATUS_CFGDONE (0x00000080) +#define ACE_STATUS_RDYFORCFCMD (0x00000100) +#define ACE_STATUS_CFGMODEPIN (0x00000200) +#define ACE_STATUS_CFGADDR_MASK (0x0000e000) +#define ACE_STATUS_CFBSY (0x00020000) +#define ACE_STATUS_CFRDY (0x00040000) +#define ACE_STATUS_CFDWF (0x00080000) +#define ACE_STATUS_CFDSC (0x00100000) +#define ACE_STATUS_CFDRQ (0x00200000) +#define ACE_STATUS_CFCORR (0x00400000) +#define ACE_STATUS_CFERR (0x00800000) + +#define ACE_ERROR (0x08) +#define ACE_CFGLBA (0x0c) +#define ACE_MPULBA (0x10) + +#define ACE_SECCNTCMD (0x14) +#define ACE_SECCNTCMD_RESET (0x0100) +#define ACE_SECCNTCMD_IDENTIFY (0x0200) +#define ACE_SECCNTCMD_READ_DATA (0x0300) +#define ACE_SECCNTCMD_WRITE_DATA (0x0400) +#define ACE_SECCNTCMD_ABORT (0x0600) + +#define ACE_VERSION (0x16) +#define ACE_VERSION_REVISION_MASK (0x00FF) +#define ACE_VERSION_MINOR_MASK (0x0F00) +#define ACE_VERSION_MAJOR_MASK (0xF000) + +#define ACE_CTRL (0x18) +#define ACE_CTRL_FORCELOCKREQ (0x0001) +#define ACE_CTRL_LOCKREQ (0x0002) +#define ACE_CTRL_FORCECFGADDR (0x0004) +#define ACE_CTRL_FORCECFGMODE (0x0008) +#define ACE_CTRL_CFGMODE (0x0010) +#define ACE_CTRL_CFGSTART (0x0020) +#define ACE_CTRL_CFGSEL (0x0040) +#define ACE_CTRL_CFGRESET (0x0080) +#define ACE_CTRL_DATABUFRDYIRQ (0x0100) +#define ACE_CTRL_ERRORIRQ (0x0200) +#define ACE_CTRL_CFGDONEIRQ (0x0400) +#define ACE_CTRL_RESETIRQ (0x0800) +#define ACE_CTRL_CFGPROG (0x1000) +#define ACE_CTRL_CFGADDR_MASK (0xe000) + +#define ACE_FATSTAT (0x1c) + +#define ACE_NUM_MINORS 16 +#define ACE_SECTOR_SIZE (512) +#define ACE_FIFO_SIZE (32) +#define ACE_BUF_PER_SECTOR (ACE_SECTOR_SIZE / ACE_FIFO_SIZE) + +#define ACE_BUS_WIDTH_8 0 +#define ACE_BUS_WIDTH_16 1 + +struct ace_reg_ops; + +struct ace_device { + /* driver state data */ + int id; + int media_change; + int users; + struct list_head list; + + /* finite state machine data */ + struct tasklet_struct fsm_tasklet; + uint fsm_task; /* Current activity (ACE_TASK_*) */ + uint fsm_state; /* Current state (ACE_FSM_STATE_*) */ + uint fsm_continue_flag; /* cleared to exit FSM mainloop */ + uint fsm_iter_num; + struct timer_list stall_timer; + + /* Transfer state/result, use for both id and block request */ + struct request *req; /* request being processed */ + void *data_ptr; /* pointer to I/O buffer */ + int data_count; /* number of buffers remaining */ + int data_result; /* Result of transfer; 0 := success */ + + int id_req_count; /* count of id requests */ + int id_result; + struct completion id_completion; /* used when id req finishes */ + int in_irq; + + /* Details of hardware device */ + resource_size_t physaddr; + void __iomem *baseaddr; + int irq; + int bus_width; /* 0 := 8 bit; 1 := 16 bit */ + struct ace_reg_ops *reg_ops; + int lock_count; + + /* Block device data structures */ + spinlock_t lock; + struct device *dev; + struct request_queue *queue; + struct gendisk *gd; + struct blk_mq_tag_set tag_set; + struct list_head rq_list; + + /* Inserted CF card parameters */ + u16 cf_id[ATA_ID_WORDS]; +}; + +static DEFINE_MUTEX(xsysace_mutex); +static int ace_major; + +/* --------------------------------------------------------------------- + * Low level register access + */ + +struct ace_reg_ops { + u16(*in) (struct ace_device * ace, int reg); + void (*out) (struct ace_device * ace, int reg, u16 val); + void (*datain) (struct ace_device * ace); + void (*dataout) (struct ace_device * ace); +}; + +/* 8 Bit bus width */ +static u16 ace_in_8(struct ace_device *ace, int reg) +{ + void __iomem *r = ace->baseaddr + reg; + return in_8(r) | (in_8(r + 1) << 8); +} + +static void ace_out_8(struct ace_device *ace, int reg, u16 val) +{ + void __iomem *r = ace->baseaddr + reg; + out_8(r, val); + out_8(r + 1, val >> 8); +} + +static void ace_datain_8(struct ace_device *ace) +{ + void __iomem *r = ace->baseaddr + 0x40; + u8 *dst = ace->data_ptr; + int i = ACE_FIFO_SIZE; + while (i--) + *dst++ = in_8(r++); + ace->data_ptr = dst; +} + +static void ace_dataout_8(struct ace_device *ace) +{ + void __iomem *r = ace->baseaddr + 0x40; + u8 *src = ace->data_ptr; + int i = ACE_FIFO_SIZE; + while (i--) + out_8(r++, *src++); + ace->data_ptr = src; +} + +static struct ace_reg_ops ace_reg_8_ops = { + .in = ace_in_8, + .out = ace_out_8, + .datain = ace_datain_8, + .dataout = ace_dataout_8, +}; + +/* 16 bit big endian bus attachment */ +static u16 ace_in_be16(struct ace_device *ace, int reg) +{ + return in_be16(ace->baseaddr + reg); +} + +static void ace_out_be16(struct ace_device *ace, int reg, u16 val) +{ + out_be16(ace->baseaddr + reg, val); +} + +static void ace_datain_be16(struct ace_device *ace) +{ + int i = ACE_FIFO_SIZE / 2; + u16 *dst = ace->data_ptr; + while (i--) + *dst++ = in_le16(ace->baseaddr + 0x40); + ace->data_ptr = dst; +} + +static void ace_dataout_be16(struct ace_device *ace) +{ + int i = ACE_FIFO_SIZE / 2; + u16 *src = ace->data_ptr; + while (i--) + out_le16(ace->baseaddr + 0x40, *src++); + ace->data_ptr = src; +} + +/* 16 bit little endian bus attachment */ +static u16 ace_in_le16(struct ace_device *ace, int reg) +{ + return in_le16(ace->baseaddr + reg); +} + +static void ace_out_le16(struct ace_device *ace, int reg, u16 val) +{ + out_le16(ace->baseaddr + reg, val); +} + +static void ace_datain_le16(struct ace_device *ace) +{ + int i = ACE_FIFO_SIZE / 2; + u16 *dst = ace->data_ptr; + while (i--) + *dst++ = in_be16(ace->baseaddr + 0x40); + ace->data_ptr = dst; +} + +static void ace_dataout_le16(struct ace_device *ace) +{ + int i = ACE_FIFO_SIZE / 2; + u16 *src = ace->data_ptr; + while (i--) + out_be16(ace->baseaddr + 0x40, *src++); + ace->data_ptr = src; +} + +static struct ace_reg_ops ace_reg_be16_ops = { + .in = ace_in_be16, + .out = ace_out_be16, + .datain = ace_datain_be16, + .dataout = ace_dataout_be16, +}; + +static struct ace_reg_ops ace_reg_le16_ops = { + .in = ace_in_le16, + .out = ace_out_le16, + .datain = ace_datain_le16, + .dataout = ace_dataout_le16, +}; + +static inline u16 ace_in(struct ace_device *ace, int reg) +{ + return ace->reg_ops->in(ace, reg); +} + +static inline u32 ace_in32(struct ace_device *ace, int reg) +{ + return ace_in(ace, reg) | (ace_in(ace, reg + 2) << 16); +} + +static inline void ace_out(struct ace_device *ace, int reg, u16 val) +{ + ace->reg_ops->out(ace, reg, val); +} + +static inline void ace_out32(struct ace_device *ace, int reg, u32 val) +{ + ace_out(ace, reg, val); + ace_out(ace, reg + 2, val >> 16); +} + +/* --------------------------------------------------------------------- + * Debug support functions + */ + +#if defined(DEBUG) +static void ace_dump_mem(void *base, int len) +{ + const char *ptr = base; + int i, j; + + for (i = 0; i < len; i += 16) { + printk(KERN_INFO "%.8x:", i); + for (j = 0; j < 16; j++) { + if (!(j % 4)) + printk(" "); + printk("%.2x", ptr[i + j]); + } + printk(" "); + for (j = 0; j < 16; j++) + printk("%c", isprint(ptr[i + j]) ? ptr[i + j] : '.'); + printk("\n"); + } +} +#else +static inline void ace_dump_mem(void *base, int len) +{ +} +#endif + +static void ace_dump_regs(struct ace_device *ace) +{ + dev_info(ace->dev, + " ctrl: %.8x seccnt/cmd: %.4x ver:%.4x\n" + " status:%.8x mpu_lba:%.8x busmode:%4x\n" + " error: %.8x cfg_lba:%.8x fatstat:%.4x\n", + ace_in32(ace, ACE_CTRL), + ace_in(ace, ACE_SECCNTCMD), + ace_in(ace, ACE_VERSION), + ace_in32(ace, ACE_STATUS), + ace_in32(ace, ACE_MPULBA), + ace_in(ace, ACE_BUSMODE), + ace_in32(ace, ACE_ERROR), + ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT)); +} + +static void ace_fix_driveid(u16 *id) +{ +#if defined(__BIG_ENDIAN) + int i; + + /* All half words have wrong byte order; swap the bytes */ + for (i = 0; i < ATA_ID_WORDS; i++, id++) + *id = le16_to_cpu(*id); +#endif +} + +/* --------------------------------------------------------------------- + * Finite State Machine (FSM) implementation + */ + +/* FSM tasks; used to direct state transitions */ +#define ACE_TASK_IDLE 0 +#define ACE_TASK_IDENTIFY 1 +#define ACE_TASK_READ 2 +#define ACE_TASK_WRITE 3 +#define ACE_FSM_NUM_TASKS 4 + +/* FSM state definitions */ +#define ACE_FSM_STATE_IDLE 0 +#define ACE_FSM_STATE_REQ_LOCK 1 +#define ACE_FSM_STATE_WAIT_LOCK 2 +#define ACE_FSM_STATE_WAIT_CFREADY 3 +#define ACE_FSM_STATE_IDENTIFY_PREPARE 4 +#define ACE_FSM_STATE_IDENTIFY_TRANSFER 5 +#define ACE_FSM_STATE_IDENTIFY_COMPLETE 6 +#define ACE_FSM_STATE_REQ_PREPARE 7 +#define ACE_FSM_STATE_REQ_TRANSFER 8 +#define ACE_FSM_STATE_REQ_COMPLETE 9 +#define ACE_FSM_STATE_ERROR 10 +#define ACE_FSM_NUM_STATES 11 + +/* Set flag to exit FSM loop and reschedule tasklet */ +static inline void ace_fsm_yieldpoll(struct ace_device *ace) +{ + tasklet_schedule(&ace->fsm_tasklet); + ace->fsm_continue_flag = 0; +} + +static inline void ace_fsm_yield(struct ace_device *ace) +{ + dev_dbg(ace->dev, "%s()\n", __func__); + ace_fsm_yieldpoll(ace); +} + +/* Set flag to exit FSM loop and wait for IRQ to reschedule tasklet */ +static inline void ace_fsm_yieldirq(struct ace_device *ace) +{ + dev_dbg(ace->dev, "ace_fsm_yieldirq()\n"); + + if (ace->irq > 0) + ace->fsm_continue_flag = 0; + else + ace_fsm_yieldpoll(ace); +} + +static bool ace_has_next_request(struct request_queue *q) +{ + struct ace_device *ace = q->queuedata; + + return !list_empty(&ace->rq_list); +} + +/* Get the next read/write request; ending requests that we don't handle */ +static struct request *ace_get_next_request(struct request_queue *q) +{ + struct ace_device *ace = q->queuedata; + struct request *rq; + + rq = list_first_entry_or_null(&ace->rq_list, struct request, queuelist); + if (rq) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); + } + + return NULL; +} + +static void ace_fsm_dostate(struct ace_device *ace) +{ + struct request *req; + u32 status; + u16 val; + int count; + +#if defined(DEBUG) + dev_dbg(ace->dev, "fsm_state=%i, id_req_count=%i\n", + ace->fsm_state, ace->id_req_count); +#endif + + /* Verify that there is actually a CF in the slot. If not, then + * bail out back to the idle state and wake up all the waiters */ + status = ace_in32(ace, ACE_STATUS); + if ((status & ACE_STATUS_CFDETECT) == 0) { + ace->fsm_state = ACE_FSM_STATE_IDLE; + ace->media_change = 1; + set_capacity(ace->gd, 0); + dev_info(ace->dev, "No CF in slot\n"); + + /* Drop all in-flight and pending requests */ + if (ace->req) { + blk_mq_end_request(ace->req, BLK_STS_IOERR); + ace->req = NULL; + } + while ((req = ace_get_next_request(ace->queue)) != NULL) + blk_mq_end_request(req, BLK_STS_IOERR); + + /* Drop back to IDLE state and notify waiters */ + ace->fsm_state = ACE_FSM_STATE_IDLE; + ace->id_result = -EIO; + while (ace->id_req_count) { + complete(&ace->id_completion); + ace->id_req_count--; + } + } + + switch (ace->fsm_state) { + case ACE_FSM_STATE_IDLE: + /* See if there is anything to do */ + if (ace->id_req_count || ace_has_next_request(ace->queue)) { + ace->fsm_iter_num++; + ace->fsm_state = ACE_FSM_STATE_REQ_LOCK; + mod_timer(&ace->stall_timer, jiffies + HZ); + if (!timer_pending(&ace->stall_timer)) + add_timer(&ace->stall_timer); + break; + } + del_timer(&ace->stall_timer); + ace->fsm_continue_flag = 0; + break; + + case ACE_FSM_STATE_REQ_LOCK: + if (ace_in(ace, ACE_STATUS) & ACE_STATUS_MPULOCK) { + /* Already have the lock, jump to next state */ + ace->fsm_state = ACE_FSM_STATE_WAIT_CFREADY; + break; + } + + /* Request the lock */ + val = ace_in(ace, ACE_CTRL); + ace_out(ace, ACE_CTRL, val | ACE_CTRL_LOCKREQ); + ace->fsm_state = ACE_FSM_STATE_WAIT_LOCK; + break; + + case ACE_FSM_STATE_WAIT_LOCK: + if (ace_in(ace, ACE_STATUS) & ACE_STATUS_MPULOCK) { + /* got the lock; move to next state */ + ace->fsm_state = ACE_FSM_STATE_WAIT_CFREADY; + break; + } + + /* wait a bit for the lock */ + ace_fsm_yield(ace); + break; + + case ACE_FSM_STATE_WAIT_CFREADY: + status = ace_in32(ace, ACE_STATUS); + if (!(status & ACE_STATUS_RDYFORCFCMD) || + (status & ACE_STATUS_CFBSY)) { + /* CF card isn't ready; it needs to be polled */ + ace_fsm_yield(ace); + break; + } + + /* Device is ready for command; determine what to do next */ + if (ace->id_req_count) + ace->fsm_state = ACE_FSM_STATE_IDENTIFY_PREPARE; + else + ace->fsm_state = ACE_FSM_STATE_REQ_PREPARE; + break; + + case ACE_FSM_STATE_IDENTIFY_PREPARE: + /* Send identify command */ + ace->fsm_task = ACE_TASK_IDENTIFY; + ace->data_ptr = ace->cf_id; + ace->data_count = ACE_BUF_PER_SECTOR; + ace_out(ace, ACE_SECCNTCMD, ACE_SECCNTCMD_IDENTIFY); + + /* As per datasheet, put config controller in reset */ + val = ace_in(ace, ACE_CTRL); + ace_out(ace, ACE_CTRL, val | ACE_CTRL_CFGRESET); + + /* irq handler takes over from this point; wait for the + * transfer to complete */ + ace->fsm_state = ACE_FSM_STATE_IDENTIFY_TRANSFER; + ace_fsm_yieldirq(ace); + break; + + case ACE_FSM_STATE_IDENTIFY_TRANSFER: + /* Check that the sysace is ready to receive data */ + status = ace_in32(ace, ACE_STATUS); + if (status & ACE_STATUS_CFBSY) { + dev_dbg(ace->dev, "CFBSY set; t=%i iter=%i dc=%i\n", + ace->fsm_task, ace->fsm_iter_num, + ace->data_count); + ace_fsm_yield(ace); + break; + } + if (!(status & ACE_STATUS_DATABUFRDY)) { + ace_fsm_yield(ace); + break; + } + + /* Transfer the next buffer */ + ace->reg_ops->datain(ace); + ace->data_count--; + + /* If there are still buffers to be transfers; jump out here */ + if (ace->data_count != 0) { + ace_fsm_yieldirq(ace); + break; + } + + /* transfer finished; kick state machine */ + dev_dbg(ace->dev, "identify finished\n"); + ace->fsm_state = ACE_FSM_STATE_IDENTIFY_COMPLETE; + break; + + case ACE_FSM_STATE_IDENTIFY_COMPLETE: + ace_fix_driveid(ace->cf_id); + ace_dump_mem(ace->cf_id, 512); /* Debug: Dump out disk ID */ + + if (ace->data_result) { + /* Error occurred, disable the disk */ + ace->media_change = 1; + set_capacity(ace->gd, 0); + dev_err(ace->dev, "error fetching CF id (%i)\n", + ace->data_result); + } else { + ace->media_change = 0; + + /* Record disk parameters */ + set_capacity(ace->gd, + ata_id_u32(ace->cf_id, ATA_ID_LBA_CAPACITY)); + dev_info(ace->dev, "capacity: %i sectors\n", + ata_id_u32(ace->cf_id, ATA_ID_LBA_CAPACITY)); + } + + /* We're done, drop to IDLE state and notify waiters */ + ace->fsm_state = ACE_FSM_STATE_IDLE; + ace->id_result = ace->data_result; + while (ace->id_req_count) { + complete(&ace->id_completion); + ace->id_req_count--; + } + break; + + case ACE_FSM_STATE_REQ_PREPARE: + req = ace_get_next_request(ace->queue); + if (!req) { + ace->fsm_state = ACE_FSM_STATE_IDLE; + break; + } + + /* Okay, it's a data request, set it up for transfer */ + dev_dbg(ace->dev, + "request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n", + (unsigned long long)blk_rq_pos(req), + blk_rq_sectors(req), blk_rq_cur_sectors(req), + rq_data_dir(req)); + + ace->req = req; + ace->data_ptr = bio_data(req->bio); + ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR; + ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF); + + count = blk_rq_sectors(req); + if (rq_data_dir(req)) { + /* Kick off write request */ + dev_dbg(ace->dev, "write data\n"); + ace->fsm_task = ACE_TASK_WRITE; + ace_out(ace, ACE_SECCNTCMD, + count | ACE_SECCNTCMD_WRITE_DATA); + } else { + /* Kick off read request */ + dev_dbg(ace->dev, "read data\n"); + ace->fsm_task = ACE_TASK_READ; + ace_out(ace, ACE_SECCNTCMD, + count | ACE_SECCNTCMD_READ_DATA); + } + + /* As per datasheet, put config controller in reset */ + val = ace_in(ace, ACE_CTRL); + ace_out(ace, ACE_CTRL, val | ACE_CTRL_CFGRESET); + + /* Move to the transfer state. The systemace will raise + * an interrupt once there is something to do + */ + ace->fsm_state = ACE_FSM_STATE_REQ_TRANSFER; + if (ace->fsm_task == ACE_TASK_READ) + ace_fsm_yieldirq(ace); /* wait for data ready */ + break; + + case ACE_FSM_STATE_REQ_TRANSFER: + /* Check that the sysace is ready to receive data */ + status = ace_in32(ace, ACE_STATUS); + if (status & ACE_STATUS_CFBSY) { + dev_dbg(ace->dev, + "CFBSY set; t=%i iter=%i c=%i dc=%i irq=%i\n", + ace->fsm_task, ace->fsm_iter_num, + blk_rq_cur_sectors(ace->req) * 16, + ace->data_count, ace->in_irq); + ace_fsm_yield(ace); /* need to poll CFBSY bit */ + break; + } + if (!(status & ACE_STATUS_DATABUFRDY)) { + dev_dbg(ace->dev, + "DATABUF not set; t=%i iter=%i c=%i dc=%i irq=%i\n", + ace->fsm_task, ace->fsm_iter_num, + blk_rq_cur_sectors(ace->req) * 16, + ace->data_count, ace->in_irq); + ace_fsm_yieldirq(ace); + break; + } + + /* Transfer the next buffer */ + if (ace->fsm_task == ACE_TASK_WRITE) + ace->reg_ops->dataout(ace); + else + ace->reg_ops->datain(ace); + ace->data_count--; + + /* If there are still buffers to be transfers; jump out here */ + if (ace->data_count != 0) { + ace_fsm_yieldirq(ace); + break; + } + + /* bio finished; is there another one? */ + if (blk_update_request(ace->req, BLK_STS_OK, + blk_rq_cur_bytes(ace->req))) { + /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", + * blk_rq_sectors(ace->req), + * blk_rq_cur_sectors(ace->req)); + */ + ace->data_ptr = bio_data(ace->req->bio); + ace->data_count = blk_rq_cur_sectors(ace->req) * 16; + ace_fsm_yieldirq(ace); + break; + } + + ace->fsm_state = ACE_FSM_STATE_REQ_COMPLETE; + break; + + case ACE_FSM_STATE_REQ_COMPLETE: + ace->req = NULL; + + /* Finished request; go to idle state */ + ace->fsm_state = ACE_FSM_STATE_IDLE; + break; + + default: + ace->fsm_state = ACE_FSM_STATE_IDLE; + break; + } +} + +static void ace_fsm_tasklet(unsigned long data) +{ + struct ace_device *ace = (void *)data; + unsigned long flags; + + spin_lock_irqsave(&ace->lock, flags); + + /* Loop over state machine until told to stop */ + ace->fsm_continue_flag = 1; + while (ace->fsm_continue_flag) + ace_fsm_dostate(ace); + + spin_unlock_irqrestore(&ace->lock, flags); +} + +static void ace_stall_timer(struct timer_list *t) +{ + struct ace_device *ace = from_timer(ace, t, stall_timer); + unsigned long flags; + + dev_warn(ace->dev, + "kicking stalled fsm; state=%i task=%i iter=%i dc=%i\n", + ace->fsm_state, ace->fsm_task, ace->fsm_iter_num, + ace->data_count); + spin_lock_irqsave(&ace->lock, flags); + + /* Rearm the stall timer *before* entering FSM (which may then + * delete the timer) */ + mod_timer(&ace->stall_timer, jiffies + HZ); + + /* Loop over state machine until told to stop */ + ace->fsm_continue_flag = 1; + while (ace->fsm_continue_flag) + ace_fsm_dostate(ace); + + spin_unlock_irqrestore(&ace->lock, flags); +} + +/* --------------------------------------------------------------------- + * Interrupt handling routines + */ +static int ace_interrupt_checkstate(struct ace_device *ace) +{ + u32 sreg = ace_in32(ace, ACE_STATUS); + u16 creg = ace_in(ace, ACE_CTRL); + + /* Check for error occurrence */ + if ((sreg & (ACE_STATUS_CFGERROR | ACE_STATUS_CFCERROR)) && + (creg & ACE_CTRL_ERRORIRQ)) { + dev_err(ace->dev, "transfer failure\n"); + ace_dump_regs(ace); + return -EIO; + } + + return 0; +} + +static irqreturn_t ace_interrupt(int irq, void *dev_id) +{ + u16 creg; + struct ace_device *ace = dev_id; + + /* be safe and get the lock */ + spin_lock(&ace->lock); + ace->in_irq = 1; + + /* clear the interrupt */ + creg = ace_in(ace, ACE_CTRL); + ace_out(ace, ACE_CTRL, creg | ACE_CTRL_RESETIRQ); + ace_out(ace, ACE_CTRL, creg); + + /* check for IO failures */ + if (ace_interrupt_checkstate(ace)) + ace->data_result = -EIO; + + if (ace->fsm_task == 0) { + dev_err(ace->dev, + "spurious irq; stat=%.8x ctrl=%.8x cmd=%.4x\n", + ace_in32(ace, ACE_STATUS), ace_in32(ace, ACE_CTRL), + ace_in(ace, ACE_SECCNTCMD)); + dev_err(ace->dev, "fsm_task=%i fsm_state=%i data_count=%i\n", + ace->fsm_task, ace->fsm_state, ace->data_count); + } + + /* Loop over state machine until told to stop */ + ace->fsm_continue_flag = 1; + while (ace->fsm_continue_flag) + ace_fsm_dostate(ace); + + /* done with interrupt; drop the lock */ + ace->in_irq = 0; + spin_unlock(&ace->lock); + + return IRQ_HANDLED; +} + +/* --------------------------------------------------------------------- + * Block ops + */ +static blk_status_t ace_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct ace_device *ace = hctx->queue->queuedata; + struct request *req = bd->rq; + + if (blk_rq_is_passthrough(req)) { + blk_mq_start_request(req); + return BLK_STS_IOERR; + } + + spin_lock_irq(&ace->lock); + list_add_tail(&req->queuelist, &ace->rq_list); + spin_unlock_irq(&ace->lock); + + tasklet_schedule(&ace->fsm_tasklet); + return BLK_STS_OK; +} + +static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing) +{ + struct ace_device *ace = gd->private_data; + dev_dbg(ace->dev, "ace_check_events(): %i\n", ace->media_change); + + return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0; +} + +static void ace_media_changed(struct ace_device *ace) +{ + unsigned long flags; + + dev_dbg(ace->dev, "requesting cf id and scheduling tasklet\n"); + + spin_lock_irqsave(&ace->lock, flags); + ace->id_req_count++; + spin_unlock_irqrestore(&ace->lock, flags); + + tasklet_schedule(&ace->fsm_tasklet); + wait_for_completion(&ace->id_completion); + + dev_dbg(ace->dev, "revalidate complete\n"); +} + +static int ace_open(struct block_device *bdev, fmode_t mode) +{ + struct ace_device *ace = bdev->bd_disk->private_data; + unsigned long flags; + + dev_dbg(ace->dev, "ace_open() users=%i\n", ace->users + 1); + + mutex_lock(&xsysace_mutex); + spin_lock_irqsave(&ace->lock, flags); + ace->users++; + spin_unlock_irqrestore(&ace->lock, flags); + + if (bdev_check_media_change(bdev) && ace->media_change) + ace_media_changed(ace); + mutex_unlock(&xsysace_mutex); + + return 0; +} + +static void ace_release(struct gendisk *disk, fmode_t mode) +{ + struct ace_device *ace = disk->private_data; + unsigned long flags; + u16 val; + + dev_dbg(ace->dev, "ace_release() users=%i\n", ace->users - 1); + + mutex_lock(&xsysace_mutex); + spin_lock_irqsave(&ace->lock, flags); + ace->users--; + if (ace->users == 0) { + val = ace_in(ace, ACE_CTRL); + ace_out(ace, ACE_CTRL, val & ~ACE_CTRL_LOCKREQ); + } + spin_unlock_irqrestore(&ace->lock, flags); + mutex_unlock(&xsysace_mutex); +} + +static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ace_device *ace = bdev->bd_disk->private_data; + u16 *cf_id = ace->cf_id; + + dev_dbg(ace->dev, "ace_getgeo()\n"); + + geo->heads = cf_id[ATA_ID_HEADS]; + geo->sectors = cf_id[ATA_ID_SECTORS]; + geo->cylinders = cf_id[ATA_ID_CYLS]; + + return 0; +} + +static const struct block_device_operations ace_fops = { + .owner = THIS_MODULE, + .open = ace_open, + .release = ace_release, + .check_events = ace_check_events, + .getgeo = ace_getgeo, +}; + +static const struct blk_mq_ops ace_mq_ops = { + .queue_rq = ace_queue_rq, +}; + +/* -------------------------------------------------------------------- + * SystemACE device setup/teardown code + */ +static int ace_setup(struct ace_device *ace) +{ + u16 version; + u16 val; + int rc; + + dev_dbg(ace->dev, "ace_setup(ace=0x%p)\n", ace); + dev_dbg(ace->dev, "physaddr=0x%llx irq=%i\n", + (unsigned long long)ace->physaddr, ace->irq); + + spin_lock_init(&ace->lock); + init_completion(&ace->id_completion); + INIT_LIST_HEAD(&ace->rq_list); + + /* + * Map the device + */ + ace->baseaddr = ioremap(ace->physaddr, 0x80); + if (!ace->baseaddr) + goto err_ioremap; + + /* + * Initialize the state machine tasklet and stall timer + */ + tasklet_init(&ace->fsm_tasklet, ace_fsm_tasklet, (unsigned long)ace); + timer_setup(&ace->stall_timer, ace_stall_timer, 0); + + /* + * Initialize the request queue + */ + ace->queue = blk_mq_init_sq_queue(&ace->tag_set, &ace_mq_ops, 2, + BLK_MQ_F_SHOULD_MERGE); + if (IS_ERR(ace->queue)) { + rc = PTR_ERR(ace->queue); + ace->queue = NULL; + goto err_blk_initq; + } + ace->queue->queuedata = ace; + + blk_queue_logical_block_size(ace->queue, 512); + blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH); + + /* + * Allocate and initialize GD structure + */ + ace->gd = alloc_disk(ACE_NUM_MINORS); + if (!ace->gd) + goto err_alloc_disk; + + ace->gd->major = ace_major; + ace->gd->first_minor = ace->id * ACE_NUM_MINORS; + ace->gd->fops = &ace_fops; + ace->gd->events = DISK_EVENT_MEDIA_CHANGE; + ace->gd->queue = ace->queue; + ace->gd->private_data = ace; + snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a'); + + /* set bus width */ + if (ace->bus_width == ACE_BUS_WIDTH_16) { + /* 0x0101 should work regardless of endianess */ + ace_out_le16(ace, ACE_BUSMODE, 0x0101); + + /* read it back to determine endianess */ + if (ace_in_le16(ace, ACE_BUSMODE) == 0x0001) + ace->reg_ops = &ace_reg_le16_ops; + else + ace->reg_ops = &ace_reg_be16_ops; + } else { + ace_out_8(ace, ACE_BUSMODE, 0x00); + ace->reg_ops = &ace_reg_8_ops; + } + + /* Make sure version register is sane */ + version = ace_in(ace, ACE_VERSION); + if ((version == 0) || (version == 0xFFFF)) + goto err_read; + + /* Put sysace in a sane state by clearing most control reg bits */ + ace_out(ace, ACE_CTRL, ACE_CTRL_FORCECFGMODE | + ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ); + + /* Now we can hook up the irq handler */ + if (ace->irq > 0) { + rc = request_irq(ace->irq, ace_interrupt, 0, "systemace", ace); + if (rc) { + /* Failure - fall back to polled mode */ + dev_err(ace->dev, "request_irq failed\n"); + ace->irq = rc; + } + } + + /* Enable interrupts */ + val = ace_in(ace, ACE_CTRL); + val |= ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ; + ace_out(ace, ACE_CTRL, val); + + /* Print the identification */ + dev_info(ace->dev, "Xilinx SystemACE revision %i.%i.%i\n", + (version >> 12) & 0xf, (version >> 8) & 0x0f, version & 0xff); + dev_dbg(ace->dev, "physaddr 0x%llx, mapped to 0x%p, irq=%i\n", + (unsigned long long) ace->physaddr, ace->baseaddr, ace->irq); + + ace->media_change = 1; + ace_media_changed(ace); + + /* Make the sysace device 'live' */ + add_disk(ace->gd); + + return 0; + +err_read: + /* prevent double queue cleanup */ + ace->gd->queue = NULL; + put_disk(ace->gd); +err_alloc_disk: + blk_cleanup_queue(ace->queue); + blk_mq_free_tag_set(&ace->tag_set); +err_blk_initq: + iounmap(ace->baseaddr); +err_ioremap: + dev_info(ace->dev, "xsysace: error initializing device at 0x%llx\n", + (unsigned long long) ace->physaddr); + return -ENOMEM; +} + +static void ace_teardown(struct ace_device *ace) +{ + if (ace->gd) { + del_gendisk(ace->gd); + put_disk(ace->gd); + } + + if (ace->queue) { + blk_cleanup_queue(ace->queue); + blk_mq_free_tag_set(&ace->tag_set); + } + + tasklet_kill(&ace->fsm_tasklet); + + if (ace->irq > 0) + free_irq(ace->irq, ace); + + iounmap(ace->baseaddr); +} + +static int ace_alloc(struct device *dev, int id, resource_size_t physaddr, + int irq, int bus_width) +{ + struct ace_device *ace; + int rc; + dev_dbg(dev, "ace_alloc(%p)\n", dev); + + /* Allocate and initialize the ace device structure */ + ace = kzalloc(sizeof(struct ace_device), GFP_KERNEL); + if (!ace) { + rc = -ENOMEM; + goto err_alloc; + } + + ace->dev = dev; + ace->id = id; + ace->physaddr = physaddr; + ace->irq = irq; + ace->bus_width = bus_width; + + /* Call the setup code */ + rc = ace_setup(ace); + if (rc) + goto err_setup; + + dev_set_drvdata(dev, ace); + return 0; + +err_setup: + dev_set_drvdata(dev, NULL); + kfree(ace); +err_alloc: + dev_err(dev, "could not initialize device, err=%i\n", rc); + return rc; +} + +static void ace_free(struct device *dev) +{ + struct ace_device *ace = dev_get_drvdata(dev); + dev_dbg(dev, "ace_free(%p)\n", dev); + + if (ace) { + ace_teardown(ace); + dev_set_drvdata(dev, NULL); + kfree(ace); + } +} + +/* --------------------------------------------------------------------- + * Platform Bus Support + */ + +static int ace_probe(struct platform_device *dev) +{ + int bus_width = ACE_BUS_WIDTH_16; /* FIXME: should not be hard coded */ + resource_size_t physaddr; + struct resource *res; + u32 id = dev->id; + int irq; + int i; + + dev_dbg(&dev->dev, "ace_probe(%p)\n", dev); + + /* device id and bus width */ + if (of_property_read_u32(dev->dev.of_node, "port-number", &id)) + id = 0; + if (of_find_property(dev->dev.of_node, "8-bit", NULL)) + bus_width = ACE_BUS_WIDTH_8; + + res = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + physaddr = res->start; + if (!physaddr) + return -ENODEV; + + irq = platform_get_irq_optional(dev, 0); + + /* Call the bus-independent setup code */ + return ace_alloc(&dev->dev, id, physaddr, irq, bus_width); +} + +/* + * Platform bus remove() method + */ +static int ace_remove(struct platform_device *dev) +{ + ace_free(&dev->dev); + return 0; +} + +#if defined(CONFIG_OF) +/* Match table for of_platform binding */ +static const struct of_device_id ace_of_match[] = { + { .compatible = "xlnx,opb-sysace-1.00.b", }, + { .compatible = "xlnx,opb-sysace-1.00.c", }, + { .compatible = "xlnx,xps-sysace-1.00.a", }, + { .compatible = "xlnx,sysace", }, + {}, +}; +MODULE_DEVICE_TABLE(of, ace_of_match); +#else /* CONFIG_OF */ +#define ace_of_match NULL +#endif /* CONFIG_OF */ + +static struct platform_driver ace_platform_driver = { + .probe = ace_probe, + .remove = ace_remove, + .driver = { + .name = "xsysace", + .of_match_table = ace_of_match, + }, +}; + +/* --------------------------------------------------------------------- + * Module init/exit routines + */ +static int __init ace_init(void) +{ + int rc; + + ace_major = register_blkdev(ace_major, "xsysace"); + if (ace_major <= 0) { + rc = -ENOMEM; + goto err_blk; + } + + rc = platform_driver_register(&ace_platform_driver); + if (rc) + goto err_plat; + + pr_info("Xilinx SystemACE device driver, major=%i\n", ace_major); + return 0; + +err_plat: + unregister_blkdev(ace_major, "xsysace"); +err_blk: + printk(KERN_ERR "xsysace: registration failed; err=%i\n", rc); + return rc; +} +module_init(ace_init); + +static void __exit ace_exit(void) +{ + pr_debug("Unregistering Xilinx SystemACE driver\n"); + platform_driver_unregister(&ace_platform_driver); + unregister_blkdev(ace_major, "xsysace"); +} +module_exit(ace_exit); diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 5b6805d87fcff..dd3c0626c72d8 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -382,7 +382,7 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, /* The first 4 bytes indicates the Intel PCIe specific packet type */ pdata = skb_pull_data(skb, BTINTEL_PCIE_HCI_TYPE_LEN); - if (!data) { + if (!pdata) { bt_dev_err(hdev, "Corrupted packet received"); ret = -EILSEQ; goto exit_error; diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 9d0c7e278114b..9bfa9a6ad56c8 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -281,7 +281,7 @@ static u8 crc8_table[CRC8_TABLE_SIZE]; /* Default configurations */ #define DEFAULT_H2C_WAKEUP_MODE WAKEUP_METHOD_BREAK -#define DEFAULT_PS_MODE PS_MODE_DISABLE +#define DEFAULT_PS_MODE PS_MODE_ENABLE #define FW_INIT_BAUDRATE HCI_NXP_PRI_BAUDRATE static struct sk_buff *nxp_drv_send_cmd(struct hci_dev *hdev, u16 opcode, diff --git a/drivers/bluetooth/hci_bcm4377.c b/drivers/bluetooth/hci_bcm4377.c index 0c2f15235b4cd..d90858ea2fe59 100644 --- a/drivers/bluetooth/hci_bcm4377.c +++ b/drivers/bluetooth/hci_bcm4377.c @@ -495,6 +495,10 @@ struct bcm4377_data; * extended scanning * broken_mws_transport_config: Set to true if the chip erroneously claims to * support MWS Transport Configuration + * broken_le_ext_adv_report_phy: Set to true if this chip stuffs flags inside + * reserved bits of Primary/Secondary_PHY inside + * LE Extended Advertising Report events which + * have to be ignored * send_calibration: Optional callback to send calibration data * send_ptb: Callback to send "PTB" regulatory/calibration data */ @@ -513,6 +517,7 @@ struct bcm4377_hw { unsigned long broken_ext_scan : 1; unsigned long broken_mws_transport_config : 1; unsigned long broken_le_coded : 1; + unsigned long broken_le_ext_adv_report_phy : 1; int (*send_calibration)(struct bcm4377_data *bcm4377); int (*send_ptb)(struct bcm4377_data *bcm4377, @@ -716,7 +721,7 @@ static void bcm4377_handle_ack(struct bcm4377_data *bcm4377, ring->events[msgid] = NULL; } - bitmap_release_region(ring->msgids, msgid, ring->n_entries); + bitmap_release_region(ring->msgids, msgid, 0); unlock: spin_unlock_irqrestore(&ring->lock, flags); @@ -2373,6 +2378,8 @@ static int bcm4377_probe(struct pci_dev *pdev, const struct pci_device_id *id) set_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &hdev->quirks); if (bcm4377->hw->broken_le_coded) set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); + if (bcm4377->hw->broken_le_ext_adv_report_phy) + set_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, &hdev->quirks); pci_set_drvdata(pdev, bcm4377); hci_set_drvdata(hdev, bcm4377); @@ -2477,6 +2484,7 @@ static const struct bcm4377_hw bcm4377_hw_variants[] = { .clear_pciecfg_subsystem_ctrl_bit19 = true, .broken_mws_transport_config = true, .broken_le_coded = true, + .broken_le_ext_adv_report_phy = true, .send_calibration = bcm4387_send_calibration, .send_ptb = bcm4378_send_ptb, }, diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 0c9c9ee56592d..9a0bc86f9aace 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2450,15 +2450,27 @@ static void qca_serdev_shutdown(struct device *dev) struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); struct hci_uart *hu = &qcadev->serdev_hu; struct hci_dev *hdev = hu->hdev; - struct qca_data *qca = hu->priv; const u8 ibs_wake_cmd[] = { 0xFD }; const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 }; if (qcadev->btsoc_type == QCA_QCA6390) { - if (test_bit(QCA_BT_OFF, &qca->flags) || - !test_bit(HCI_RUNNING, &hdev->flags)) + /* The purpose of sending the VSC is to reset SOC into a initial + * state and the state will ensure next hdev->setup() success. + * if HCI_QUIRK_NON_PERSISTENT_SETUP is set, it means that + * hdev->setup() can do its job regardless of SoC state, so + * don't need to send the VSC. + * if HCI_SETUP is set, it means that hdev->setup() was never + * invoked and the SOC is already in the initial state, so + * don't also need to send the VSC. + */ + if (test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks) || + hci_dev_test_flag(hdev, HCI_SETUP)) return; + /* The serdev must be in open state when conrol logic arrives + * here, so also fix the use-after-free issue caused by that + * the serdev is flushed or wrote after it is closed. + */ serdev_device_write_flush(serdev); ret = serdev_device_write_buf(serdev, ibs_wake_cmd, sizeof(ibs_wake_cmd)); diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index d51fc8321d411..da32e8ed08309 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -269,8 +269,13 @@ hpet_read(struct file *file, char __user *buf, size_t count, loff_t * ppos) if (!devp->hd_ireqfreq) return -EIO; - if (count < sizeof(unsigned long)) - return -EINVAL; + if (in_compat_syscall()) { + if (count < sizeof(compat_ulong_t)) + return -EINVAL; + } else { + if (count < sizeof(unsigned long)) + return -EINVAL; + } add_wait_queue(&devp->hd_waitqueue, &wait); @@ -294,9 +299,16 @@ hpet_read(struct file *file, char __user *buf, size_t count, loff_t * ppos) schedule(); } - retval = put_user(data, (unsigned long __user *)buf); - if (!retval) - retval = sizeof(unsigned long); + if (in_compat_syscall()) { + retval = put_user(data, (compat_ulong_t __user *)buf); + if (!retval) + retval = sizeof(compat_ulong_t); + } else { + retval = put_user(data, (unsigned long __user *)buf); + if (!retval) + retval = sizeof(unsigned long); + } + out: __set_current_state(TASK_RUNNING); remove_wait_queue(&devp->hd_waitqueue, &wait); @@ -651,12 +663,24 @@ struct compat_hpet_info { unsigned short hi_timer; }; +/* 32-bit types would lead to different command codes which should be + * translated into 64-bit ones before passed to hpet_ioctl_common + */ +#define COMPAT_HPET_INFO _IOR('h', 0x03, struct compat_hpet_info) +#define COMPAT_HPET_IRQFREQ _IOW('h', 0x6, compat_ulong_t) + static long hpet_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct hpet_info info; int err; + if (cmd == COMPAT_HPET_INFO) + cmd = HPET_INFO; + + if (cmd == COMPAT_HPET_IRQFREQ) + cmd = HPET_IRQFREQ; + mutex_lock(&hpet_mutex); err = hpet_ioctl_common(file->private_data, cmd, arg, &info); mutex_unlock(&hpet_mutex); diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 4c695b0388f3e..9bb142c75243f 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -16,8 +16,8 @@ tpm-y += eventlog/common.o tpm-y += eventlog/tpm1.o tpm-y += eventlog/tpm2.o tpm-y += tpm-buf.o +tpm-y += tpm2-sessions.o -tpm-$(CONFIG_TCG_TPM2_HMAC) += tpm2-sessions.o tpm-$(CONFIG_ACPI) += tpm_ppi.o eventlog/acpi.o tpm-$(CONFIG_EFI) += eventlog/efi.o tpm-$(CONFIG_OF) += eventlog/of.o diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 907ac9956a78f..2281d55df5456 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -83,9 +83,6 @@ #define AES_KEY_BYTES AES_KEYSIZE_128 #define AES_KEY_BITS (AES_KEY_BYTES*8) -static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, - u32 *handle, u8 *name); - /* * This is the structure that carries all the auth information (like * session handle, nonces, session key and auth) from use to use it is @@ -148,6 +145,7 @@ struct tpm2_auth { u8 name[AUTH_MAX_NAMES][2 + SHA512_DIGEST_SIZE]; }; +#ifdef CONFIG_TCG_TPM2_HMAC /* * Name Size based on TPM algorithm (assumes no hash bigger than 255) */ @@ -163,6 +161,226 @@ static u8 name_size(const u8 *name) return size_map[alg] + 2; } +static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) +{ + struct tpm_header *head = (struct tpm_header *)buf->data; + off_t offset = TPM_HEADER_SIZE; + u32 tot_len = be32_to_cpu(head->length); + u32 val; + + /* we're starting after the header so adjust the length */ + tot_len -= TPM_HEADER_SIZE; + + /* skip public */ + val = tpm_buf_read_u16(buf, &offset); + if (val > tot_len) + return -EINVAL; + offset += val; + /* name */ + val = tpm_buf_read_u16(buf, &offset); + if (val != name_size(&buf->data[offset])) + return -EINVAL; + memcpy(name, &buf->data[offset], val); + /* forget the rest */ + return 0; +} + +static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) +{ + struct tpm_buf buf; + int rc; + + rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); + if (rc) + return rc; + + tpm_buf_append_u32(&buf, handle); + rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); + if (rc == TPM2_RC_SUCCESS) + rc = tpm2_parse_read_public(name, &buf); + + tpm_buf_destroy(&buf); + + return rc; +} +#endif /* CONFIG_TCG_TPM2_HMAC */ + +/** + * tpm_buf_append_name() - add a handle area to the buffer + * @chip: the TPM chip structure + * @buf: The buffer to be appended + * @handle: The handle to be appended + * @name: The name of the handle (may be NULL) + * + * In order to compute session HMACs, we need to know the names of the + * objects pointed to by the handles. For most objects, this is simply + * the actual 4 byte handle or an empty buf (in these cases @name + * should be NULL) but for volatile objects, permanent objects and NV + * areas, the name is defined as the hash (according to the name + * algorithm which should be set to sha256) of the public area to + * which the two byte algorithm id has been appended. For these + * objects, the @name pointer should point to this. If a name is + * required but @name is NULL, then TPM2_ReadPublic() will be called + * on the handle to obtain the name. + * + * As with most tpm_buf operations, success is assumed because failure + * will be caused by an incorrect programming model and indicated by a + * kernel message. + */ +void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, + u32 handle, u8 *name) +{ +#ifdef CONFIG_TCG_TPM2_HMAC + enum tpm2_mso_type mso = tpm2_handle_mso(handle); + struct tpm2_auth *auth; + int slot; +#endif + + if (!tpm2_chip_auth(chip)) { + tpm_buf_append_u32(buf, handle); + /* count the number of handles in the upper bits of flags */ + buf->handles++; + return; + } + +#ifdef CONFIG_TCG_TPM2_HMAC + slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4; + if (slot >= AUTH_MAX_NAMES) { + dev_err(&chip->dev, "TPM: too many handles\n"); + return; + } + auth = chip->auth; + WARN(auth->session != tpm_buf_length(buf), + "name added in wrong place\n"); + tpm_buf_append_u32(buf, handle); + auth->session += 4; + + if (mso == TPM2_MSO_PERSISTENT || + mso == TPM2_MSO_VOLATILE || + mso == TPM2_MSO_NVRAM) { + if (!name) + tpm2_read_public(chip, handle, auth->name[slot]); + } else { + if (name) + dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); + } + + auth->name_h[slot] = handle; + if (name) + memcpy(auth->name[slot], name, name_size(name)); +#endif +} +EXPORT_SYMBOL_GPL(tpm_buf_append_name); + +/** + * tpm_buf_append_hmac_session() - Append a TPM session element + * @chip: the TPM chip structure + * @buf: The buffer to be appended + * @attributes: The session attributes + * @passphrase: The session authority (NULL if none) + * @passphrase_len: The length of the session authority (0 if none) + * + * This fills in a session structure in the TPM command buffer, except + * for the HMAC which cannot be computed until the command buffer is + * complete. The type of session is controlled by the @attributes, + * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the + * session won't terminate after tpm_buf_check_hmac_response(), + * TPM2_SA_DECRYPT which means this buffers first parameter should be + * encrypted with a session key and TPM2_SA_ENCRYPT, which means the + * response buffer's first parameter needs to be decrypted (confusing, + * but the defines are written from the point of view of the TPM). + * + * Any session appended by this command must be finalized by calling + * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect + * and the TPM will reject the command. + * + * As with most tpm_buf operations, success is assumed because failure + * will be caused by an incorrect programming model and indicated by a + * kernel message. + */ +void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, + u8 attributes, u8 *passphrase, + int passphrase_len) +{ +#ifdef CONFIG_TCG_TPM2_HMAC + u8 nonce[SHA256_DIGEST_SIZE]; + struct tpm2_auth *auth; + u32 len; +#endif + + if (!tpm2_chip_auth(chip)) { + /* offset tells us where the sessions area begins */ + int offset = buf->handles * 4 + TPM_HEADER_SIZE; + u32 len = 9 + passphrase_len; + + if (tpm_buf_length(buf) != offset) { + /* not the first session so update the existing length */ + len += get_unaligned_be32(&buf->data[offset]); + put_unaligned_be32(len, &buf->data[offset]); + } else { + tpm_buf_append_u32(buf, len); + } + /* auth handle */ + tpm_buf_append_u32(buf, TPM2_RS_PW); + /* nonce */ + tpm_buf_append_u16(buf, 0); + /* attributes */ + tpm_buf_append_u8(buf, 0); + /* passphrase */ + tpm_buf_append_u16(buf, passphrase_len); + tpm_buf_append(buf, passphrase, passphrase_len); + return; + } + +#ifdef CONFIG_TCG_TPM2_HMAC + /* + * The Architecture Guide requires us to strip trailing zeros + * before computing the HMAC + */ + while (passphrase && passphrase_len > 0 && passphrase[passphrase_len - 1] == '\0') + passphrase_len--; + + auth = chip->auth; + auth->attrs = attributes; + auth->passphrase_len = passphrase_len; + if (passphrase_len) + memcpy(auth->passphrase, passphrase, passphrase_len); + + if (auth->session != tpm_buf_length(buf)) { + /* we're not the first session */ + len = get_unaligned_be32(&buf->data[auth->session]); + if (4 + len + auth->session != tpm_buf_length(buf)) { + WARN(1, "session length mismatch, cannot append"); + return; + } + + /* add our new session */ + len += 9 + 2 * SHA256_DIGEST_SIZE; + put_unaligned_be32(len, &buf->data[auth->session]); + } else { + tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); + } + + /* random number for our nonce */ + get_random_bytes(nonce, sizeof(nonce)); + memcpy(auth->our_nonce, nonce, sizeof(nonce)); + tpm_buf_append_u32(buf, auth->handle); + /* our new nonce */ + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); + tpm_buf_append_u8(buf, auth->attrs); + /* and put a placeholder for the hmac */ + tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); + tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); +#endif +} +EXPORT_SYMBOL_GPL(tpm_buf_append_hmac_session); + +#ifdef CONFIG_TCG_TPM2_HMAC + +static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy, + u32 *handle, u8 *name); + /* * It turns out the crypto hmac(sha256) is hard for us to consume * because it assumes a fixed key and the TPM seems to change the key @@ -343,82 +561,6 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip) crypto_free_kpp(kpp); } -/** - * tpm_buf_append_hmac_session() - Append a TPM session element - * @chip: the TPM chip structure - * @buf: The buffer to be appended - * @attributes: The session attributes - * @passphrase: The session authority (NULL if none) - * @passphrase_len: The length of the session authority (0 if none) - * - * This fills in a session structure in the TPM command buffer, except - * for the HMAC which cannot be computed until the command buffer is - * complete. The type of session is controlled by the @attributes, - * the main ones of which are TPM2_SA_CONTINUE_SESSION which means the - * session won't terminate after tpm_buf_check_hmac_response(), - * TPM2_SA_DECRYPT which means this buffers first parameter should be - * encrypted with a session key and TPM2_SA_ENCRYPT, which means the - * response buffer's first parameter needs to be decrypted (confusing, - * but the defines are written from the point of view of the TPM). - * - * Any session appended by this command must be finalized by calling - * tpm_buf_fill_hmac_session() otherwise the HMAC will be incorrect - * and the TPM will reject the command. - * - * As with most tpm_buf operations, success is assumed because failure - * will be caused by an incorrect programming model and indicated by a - * kernel message. - */ -void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, - u8 attributes, u8 *passphrase, - int passphrase_len) -{ - u8 nonce[SHA256_DIGEST_SIZE]; - u32 len; - struct tpm2_auth *auth = chip->auth; - - /* - * The Architecture Guide requires us to strip trailing zeros - * before computing the HMAC - */ - while (passphrase && passphrase_len > 0 - && passphrase[passphrase_len - 1] == '\0') - passphrase_len--; - - auth->attrs = attributes; - auth->passphrase_len = passphrase_len; - if (passphrase_len) - memcpy(auth->passphrase, passphrase, passphrase_len); - - if (auth->session != tpm_buf_length(buf)) { - /* we're not the first session */ - len = get_unaligned_be32(&buf->data[auth->session]); - if (4 + len + auth->session != tpm_buf_length(buf)) { - WARN(1, "session length mismatch, cannot append"); - return; - } - - /* add our new session */ - len += 9 + 2 * SHA256_DIGEST_SIZE; - put_unaligned_be32(len, &buf->data[auth->session]); - } else { - tpm_buf_append_u32(buf, 9 + 2 * SHA256_DIGEST_SIZE); - } - - /* random number for our nonce */ - get_random_bytes(nonce, sizeof(nonce)); - memcpy(auth->our_nonce, nonce, sizeof(nonce)); - tpm_buf_append_u32(buf, auth->handle); - /* our new nonce */ - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); - tpm_buf_append_u8(buf, auth->attrs); - /* and put a placeholder for the hmac */ - tpm_buf_append_u16(buf, SHA256_DIGEST_SIZE); - tpm_buf_append(buf, nonce, SHA256_DIGEST_SIZE); -} -EXPORT_SYMBOL(tpm_buf_append_hmac_session); - /** * tpm_buf_fill_hmac_session() - finalize the session HMAC * @chip: the TPM chip structure @@ -449,6 +591,9 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) u8 cphash[SHA256_DIGEST_SIZE]; struct sha256_state sctx; + if (!auth) + return; + /* save the command code in BE format */ auth->ordinal = head->ordinal; @@ -567,104 +712,6 @@ void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) } EXPORT_SYMBOL(tpm_buf_fill_hmac_session); -static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) -{ - struct tpm_header *head = (struct tpm_header *)buf->data; - off_t offset = TPM_HEADER_SIZE; - u32 tot_len = be32_to_cpu(head->length); - u32 val; - - /* we're starting after the header so adjust the length */ - tot_len -= TPM_HEADER_SIZE; - - /* skip public */ - val = tpm_buf_read_u16(buf, &offset); - if (val > tot_len) - return -EINVAL; - offset += val; - /* name */ - val = tpm_buf_read_u16(buf, &offset); - if (val != name_size(&buf->data[offset])) - return -EINVAL; - memcpy(name, &buf->data[offset], val); - /* forget the rest */ - return 0; -} - -static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) -{ - struct tpm_buf buf; - int rc; - - rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); - if (rc) - return rc; - - tpm_buf_append_u32(&buf, handle); - rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); - if (rc == TPM2_RC_SUCCESS) - rc = tpm2_parse_read_public(name, &buf); - - tpm_buf_destroy(&buf); - - return rc; -} - -/** - * tpm_buf_append_name() - add a handle area to the buffer - * @chip: the TPM chip structure - * @buf: The buffer to be appended - * @handle: The handle to be appended - * @name: The name of the handle (may be NULL) - * - * In order to compute session HMACs, we need to know the names of the - * objects pointed to by the handles. For most objects, this is simply - * the actual 4 byte handle or an empty buf (in these cases @name - * should be NULL) but for volatile objects, permanent objects and NV - * areas, the name is defined as the hash (according to the name - * algorithm which should be set to sha256) of the public area to - * which the two byte algorithm id has been appended. For these - * objects, the @name pointer should point to this. If a name is - * required but @name is NULL, then TPM2_ReadPublic() will be called - * on the handle to obtain the name. - * - * As with most tpm_buf operations, success is assumed because failure - * will be caused by an incorrect programming model and indicated by a - * kernel message. - */ -void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, - u32 handle, u8 *name) -{ - enum tpm2_mso_type mso = tpm2_handle_mso(handle); - struct tpm2_auth *auth = chip->auth; - int slot; - - slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE)/4; - if (slot >= AUTH_MAX_NAMES) { - dev_err(&chip->dev, "TPM: too many handles\n"); - return; - } - WARN(auth->session != tpm_buf_length(buf), - "name added in wrong place\n"); - tpm_buf_append_u32(buf, handle); - auth->session += 4; - - if (mso == TPM2_MSO_PERSISTENT || - mso == TPM2_MSO_VOLATILE || - mso == TPM2_MSO_NVRAM) { - if (!name) - tpm2_read_public(chip, handle, auth->name[slot]); - } else { - if (name) - dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); - } - - auth->name_h[slot] = handle; - if (name) - memcpy(auth->name[slot], name, name_size(name)); -} -EXPORT_SYMBOL(tpm_buf_append_name); - /** * tpm_buf_check_hmac_response() - check the TPM return HMAC for correctness * @chip: the TPM chip structure @@ -705,6 +752,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, u32 cc = be32_to_cpu(auth->ordinal); int parm_len, len, i, handles; + if (!auth) + return rc; + if (auth->session >= TPM_HEADER_SIZE) { WARN(1, "tpm session not filled correctly\n"); goto out; @@ -824,8 +874,13 @@ EXPORT_SYMBOL(tpm_buf_check_hmac_response); */ void tpm2_end_auth_session(struct tpm_chip *chip) { - tpm2_flush_context(chip, chip->auth->handle); - memzero_explicit(chip->auth, sizeof(*chip->auth)); + struct tpm2_auth *auth = chip->auth; + + if (!auth) + return; + + tpm2_flush_context(chip, auth->handle); + memzero_explicit(auth, sizeof(*auth)); } EXPORT_SYMBOL(tpm2_end_auth_session); @@ -907,6 +962,11 @@ int tpm2_start_auth_session(struct tpm_chip *chip) int rc; u32 null_key; + if (!auth) { + dev_warn_once(&chip->dev, "auth session is not active\n"); + return 0; + } + rc = tpm2_load_null(chip, &null_key); if (rc) goto out; @@ -1301,3 +1361,4 @@ int tpm2_sessions_init(struct tpm_chip *chip) return rc; } +#endif /* CONFIG_TCG_TPM2_HMAC */ diff --git a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c index ba504e19d4203..62d876e150e11 100644 --- a/drivers/clk/mediatek/clk-mt8183-mfgcfg.c +++ b/drivers/clk/mediatek/clk-mt8183-mfgcfg.c @@ -29,6 +29,7 @@ static const struct mtk_gate mfg_clks[] = { static const struct mtk_clk_desc mfg_desc = { .clks = mfg_clks, .num_clks = ARRAY_SIZE(mfg_clks), + .need_runtime_pm = true, }; static const struct of_device_id of_match_clk_mt8183_mfg[] = { diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c index bd37ab4d1a9bb..ba1d1c495bc2b 100644 --- a/drivers/clk/mediatek/clk-mtk.c +++ b/drivers/clk/mediatek/clk-mtk.c @@ -496,14 +496,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, } - devm_pm_runtime_enable(&pdev->dev); - /* - * Do a pm_runtime_resume_and_get() to workaround a possible - * deadlock between clk_register() and the genpd framework. - */ - r = pm_runtime_resume_and_get(&pdev->dev); - if (r) - return r; + if (mcd->need_runtime_pm) { + devm_pm_runtime_enable(&pdev->dev); + /* + * Do a pm_runtime_resume_and_get() to workaround a possible + * deadlock between clk_register() and the genpd framework. + */ + r = pm_runtime_resume_and_get(&pdev->dev); + if (r) + return r; + } /* Calculate how many clk_hw_onecell_data entries to allocate */ num_clks = mcd->num_clks + mcd->num_composite_clks; @@ -585,7 +587,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, goto unregister_clks; } - pm_runtime_put(&pdev->dev); + if (mcd->need_runtime_pm) + pm_runtime_put(&pdev->dev); return r; @@ -618,7 +621,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev, if (mcd->shared_io && base) iounmap(base); - pm_runtime_put(&pdev->dev); + if (mcd->need_runtime_pm) + pm_runtime_put(&pdev->dev); return r; } diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h index 22096501a60a7..c17fe1c2d732d 100644 --- a/drivers/clk/mediatek/clk-mtk.h +++ b/drivers/clk/mediatek/clk-mtk.h @@ -237,6 +237,8 @@ struct mtk_clk_desc { int (*clk_notifier_func)(struct device *dev, struct clk *clk); unsigned int mfg_clk_idx; + + bool need_runtime_pm; }; int mtk_clk_pdev_probe(struct platform_device *pdev); diff --git a/drivers/clk/qcom/apss-ipq-pll.c b/drivers/clk/qcom/apss-ipq-pll.c index 5f7f537e4ecbe..e8632db2c5428 100644 --- a/drivers/clk/qcom/apss-ipq-pll.c +++ b/drivers/clk/qcom/apss-ipq-pll.c @@ -70,7 +70,6 @@ static struct clk_alpha_pll ipq_pll_stromer_plus = { static const struct alpha_pll_config ipq5018_pll_config = { .l = 0x2a, .config_ctl_val = 0x4001075b, - .config_ctl_hi_val = 0x304, .main_output_mask = BIT(0), .aux_output_mask = BIT(1), .early_output_mask = BIT(3), @@ -84,7 +83,6 @@ static const struct alpha_pll_config ipq5018_pll_config = { static const struct alpha_pll_config ipq5332_pll_config = { .l = 0x2d, .config_ctl_val = 0x4001075b, - .config_ctl_hi_val = 0x304, .main_output_mask = BIT(0), .aux_output_mask = BIT(1), .early_output_mask = BIT(3), diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index d4227909d1fe1..c51647e37df8e 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -2574,6 +2574,9 @@ static int clk_alpha_pll_stromer_plus_set_rate(struct clk_hw *hw, regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL_U(pll), a >> ALPHA_BITWIDTH); + regmap_update_bits(pll->clkr.regmap, PLL_USER_CTL(pll), + PLL_ALPHA_EN, PLL_ALPHA_EN); + regmap_write(pll->clkr.regmap, PLL_MODE(pll), PLL_BYPASSNL); /* Wait five micro seconds or more */ diff --git a/drivers/clk/qcom/gcc-ipq9574.c b/drivers/clk/qcom/gcc-ipq9574.c index 0a3f846695b80..f8b9a1e93bef2 100644 --- a/drivers/clk/qcom/gcc-ipq9574.c +++ b/drivers/clk/qcom/gcc-ipq9574.c @@ -2140,9 +2140,10 @@ static struct clk_rcg2 pcnoc_bfdcd_clk_src = { static struct clk_branch gcc_crypto_axi_clk = { .halt_reg = 0x16010, + .halt_check = BRANCH_HALT_VOTED, .clkr = { - .enable_reg = 0x16010, - .enable_mask = BIT(0), + .enable_reg = 0xb004, + .enable_mask = BIT(15), .hw.init = &(const struct clk_init_data) { .name = "gcc_crypto_axi_clk", .parent_hws = (const struct clk_hw *[]) { @@ -2156,9 +2157,10 @@ static struct clk_branch gcc_crypto_axi_clk = { static struct clk_branch gcc_crypto_ahb_clk = { .halt_reg = 0x16014, + .halt_check = BRANCH_HALT_VOTED, .clkr = { - .enable_reg = 0x16014, - .enable_mask = BIT(0), + .enable_reg = 0xb004, + .enable_mask = BIT(16), .hw.init = &(const struct clk_init_data) { .name = "gcc_crypto_ahb_clk", .parent_hws = (const struct clk_hw *[]) { diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c index cf4a7b6e0b23a..0559a33faf00e 100644 --- a/drivers/clk/qcom/gcc-sm6350.c +++ b/drivers/clk/qcom/gcc-sm6350.c @@ -100,8 +100,8 @@ static struct clk_alpha_pll gpll6 = { .enable_mask = BIT(6), .hw.init = &(struct clk_init_data){ .name = "gpll6", - .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", }, .num_parents = 1, .ops = &clk_alpha_pll_fixed_fabia_ops, @@ -124,7 +124,7 @@ static struct clk_alpha_pll_postdiv gpll6_out_even = { .clkr.hw.init = &(struct clk_init_data){ .name = "gpll6_out_even", .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + &gpll6.clkr.hw, }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_fabia_ops, @@ -139,8 +139,8 @@ static struct clk_alpha_pll gpll7 = { .enable_mask = BIT(7), .hw.init = &(struct clk_init_data){ .name = "gpll7", - .parent_hws = (const struct clk_hw*[]){ - &gpll0.clkr.hw, + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", }, .num_parents = 1, .ops = &clk_alpha_pll_fixed_fabia_ops, diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index ac0091b4ce242..be375ce0149c8 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -132,7 +132,6 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, for (i = 0; i < desc->hw_clks->num ; i++) { struct clk_hw *hw = desc->hw_clks->hws[i]; - struct ccu_common *common = hw_to_ccu_common(hw); const char *name; if (!hw) @@ -147,14 +146,21 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, pr_err("Couldn't register clock %d - %s\n", i, name); goto err_clk_unreg; } + } + + for (i = 0; i < desc->num_ccu_clks; i++) { + struct ccu_common *cclk = desc->ccu_clks[i]; + + if (!cclk) + continue; - if (common->max_rate) - clk_hw_set_rate_range(hw, common->min_rate, - common->max_rate); + if (cclk->max_rate) + clk_hw_set_rate_range(&cclk->hw, cclk->min_rate, + cclk->max_rate); else - WARN(common->min_rate, + WARN(cclk->min_rate, "No max_rate, ignoring min_rate of clock %d - %s\n", - i, name); + i, clk_hw_get_name(&cclk->hw)); } ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 37f1cdf46d291..4ac3a35dcd983 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -890,8 +890,10 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) + if (acpi_cpufreq_driver.set_boost) { set_boost(policy, acpi_cpufreq_driver.boost_enabled); + policy->boost_enabled = acpi_cpufreq_driver.boost_enabled; + } return result; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index a45aac17c20f0..9e5060b278648 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1431,7 +1431,8 @@ static int cpufreq_online(unsigned int cpu) } /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - policy->boost_enabled = cpufreq_boost_enabled() && policy_has_boost_freq(policy); + if (cpufreq_boost_enabled() && policy_has_boost_freq(policy)) + policy->boost_enabled = true; /* * The initialization has succeeded and the policy is online. diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 784843fa2a22b..3df10517a3278 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -52,6 +52,14 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) struct cxl_dport *dport = NULL; int single_port_map[1]; unsigned long index; + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + + /* + * Capability checks are moot for passthrough decoders, support + * any and all possibilities. + */ + cxlhdm->interleave_mask = ~0U; + cxlhdm->iw_cap_mask = ~0UL; cxlsd = cxl_switch_decoder_alloc(port, 1); if (IS_ERR(cxlsd)) @@ -79,6 +87,11 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) cxlhdm->interleave_mask |= GENMASK(11, 8); if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap)) cxlhdm->interleave_mask |= GENMASK(14, 12); + cxlhdm->iw_cap_mask = BIT(1) | BIT(2) | BIT(4) | BIT(8); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap)) + cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap)) + cxlhdm->iw_cap_mask |= BIT(16); } static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index e69625a8d6a1d..c00f3a933164f 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -62,10 +62,14 @@ static int match_nvdimm_bridge(struct device *dev, void *data) return is_cxl_nvdimm_bridge(dev); } -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd) +/** + * cxl_find_nvdimm_bridge() - find a bridge device relative to a port + * @port: any descendant port of an nvdimm-bridge associated + * root-cxl-port + */ +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port) { - struct cxl_root *cxl_root __free(put_cxl_root) = - find_cxl_root(cxlmd->endpoint); + struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port); struct device *dev; if (!cxl_root) @@ -242,18 +246,20 @@ static void cxlmd_release_nvdimm(void *_cxlmd) /** * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm + * @parent_port: parent port for the (to be added) @cxlmd endpoint port * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations * * Return: 0 on success negative error code on failure. */ -int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd) +int devm_cxl_add_nvdimm(struct cxl_port *parent_port, + struct cxl_memdev *cxlmd) { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; struct device *dev; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); + cxl_nvb = cxl_find_nvdimm_bridge(parent_port); if (!cxl_nvb) return -ENODEV; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3c2b6144be23c..538ebd5a64fd9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1101,6 +1101,26 @@ static int cxl_port_attach_region(struct cxl_port *port, } cxld = cxl_rr->decoder; + /* + * the number of targets should not exceed the target_count + * of the decoder + */ + if (is_switch_decoder(&cxld->dev)) { + struct cxl_switch_decoder *cxlsd; + + cxlsd = to_cxl_switch_decoder(&cxld->dev); + if (cxl_rr->nr_targets > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s %s add: %s:%s @ %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), dev_name(&cxlmd->dev), + dev_name(&cxled->cxld.dev), pos, + cxlsd->nr_targets); + rc = -ENXIO; + goto out_erase; + } + } + rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { dev_dbg(&cxlr->dev, @@ -1210,6 +1230,50 @@ static int check_last_peer(struct cxl_endpoint_decoder *cxled, return 0; } +static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig) +{ + struct cxl_port *port = to_cxl_port(cxld->dev.parent); + struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); + unsigned int interleave_mask; + u8 eiw; + u16 eig; + int high_pos, low_pos; + + if (!test_bit(iw, &cxlhdm->iw_cap_mask)) + return -ENXIO; + /* + * Per CXL specification r3.1(8.2.4.20.13 Decoder Protection), + * if eiw < 8: + * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + 8 + eiw] + * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0] + * + * when the eiw is 0, all the bits of HPAOFFSET[51: 0] are used, the + * interleave bits are none. + * + * if eiw >= 8: + * DPAOFFSET[51: eig + 8] = HPAOFFSET[51: eig + eiw] / 3 + * DPAOFFSET[eig + 7: 0] = HPAOFFSET[eig + 7: 0] + * + * when the eiw is 8, all the bits of HPAOFFSET[51: 0] are used, the + * interleave bits are none. + */ + ways_to_eiw(iw, &eiw); + if (eiw == 0 || eiw == 8) + return 0; + + granularity_to_eig(ig, &eig); + if (eiw > 8) + high_pos = eiw + eig - 1; + else + high_pos = eiw + eig + 7; + low_pos = eig + 8; + interleave_mask = GENMASK(high_pos, low_pos); + if (interleave_mask & ~cxlhdm->interleave_mask) + return -ENXIO; + + return 0; +} + static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled) @@ -1360,6 +1424,15 @@ static int cxl_port_setup_targets(struct cxl_port *port, return -ENXIO; } } else { + rc = check_interleave_cap(cxld, iw, ig); + if (rc) { + dev_dbg(&cxlr->dev, + "%s:%s iw: %d ig: %d is not supported\n", + dev_name(port->uport_dev), + dev_name(&port->dev), iw, ig); + return rc; + } + cxld->interleave_ways = iw; cxld->interleave_granularity = ig; cxld->hpa_range = (struct range) { @@ -1796,6 +1869,15 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_dport *dport; int rc = -ENXIO; + rc = check_interleave_cap(&cxled->cxld, p->interleave_ways, + p->interleave_granularity); + if (rc) { + dev_dbg(&cxlr->dev, "%s iw: %d ig: %d is not supported\n", + dev_name(&cxled->cxld.dev), p->interleave_ways, + p->interleave_granularity); + return rc; + } + if (cxled->mode != cxlr->mode) { dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); @@ -2688,22 +2770,33 @@ static int __cxl_dpa_to_region(struct device *dev, void *arg) { struct cxl_dpa_to_region_context *ctx = arg; struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; u64 dpa = ctx->dpa; if (!is_endpoint_decoder(dev)) return 0; cxled = to_cxl_endpoint_decoder(dev); - if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) + if (!cxled || !cxled->dpa_res || !resource_size(cxled->dpa_res)) return 0; if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) return 0; - dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, - dev_name(&cxled->cxld.region->dev)); + /* + * Stop the region search (return 1) when an endpoint mapping is + * found. The region may not be fully constructed so offering + * the cxlr in the context structure is not guaranteed. + */ + cxlr = cxled->cxld.region; + if (cxlr) + dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, + dev_name(&cxlr->dev)); + else + dev_dbg(dev, "dpa:0x%llx mapped in endpoint:%s\n", dpa, + dev_name(dev)); - ctx->cxlr = cxled->cxld.region; + ctx->cxlr = cxlr; return 1; } @@ -2847,7 +2940,7 @@ static int cxl_pmem_region_alloc(struct cxl_region *cxlr) * bridge for one device is the same for all. */ if (i == 0) { - cxl_nvb = cxl_find_nvdimm_bridge(cxlmd); + cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint); if (!cxl_nvb) return -ENODEV; cxlr->cxl_nvb = cxl_nvb; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 603c0120cff80..a6613a6f89237 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -47,6 +47,8 @@ extern const struct nvdimm_security_ops *cxl_security_ops; #define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) #define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) #define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) +#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11) +#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12) #define CXL_HDM_DECODER_CTRL_OFFSET 0x4 #define CXL_HDM_DECODER_ENABLE BIT(1) #define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) @@ -855,8 +857,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm_bridge(struct device *dev); -int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd); -struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd); +int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd); +struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port); #ifdef CONFIG_CXL_REGION bool is_cxl_pmem_region(struct device *dev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 19aba81cdf132..af8169ccdbc05 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -395,9 +395,9 @@ enum cxl_devtype { /** * struct cxl_dpa_perf - DPA performance property entry - * @dpa_range - range for DPA address - * @coord - QoS performance data (i.e. latency, bandwidth) - * @qos_class - QoS Class cookies + * @dpa_range: range for DPA address + * @coord: QoS performance data (i.e. latency, bandwidth) + * @qos_class: QoS Class cookies */ struct cxl_dpa_perf { struct range dpa_range; @@ -464,13 +464,14 @@ struct cxl_dev_state { * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset + * @ram_perf: performance data entry matched to RAM partition + * @pmem_perf: performance data entry matched to PMEM partition * @event: event log driver state * @poison: poison driver state info * @security: security driver state info * @fw: firmware upload / activation state + * @mbox_wait: RCU wait for mbox send completely * @mbox_send: @dev specific transport for transmitting mailbox commands - * @ram_perf: performance data entry matched to RAM partition - * @pmem_perf: performance data entry matched to PMEM partition * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -851,11 +852,21 @@ static inline void cxl_mem_active_dec(void) int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd); +/** + * struct cxl_hdm - HDM Decoder registers and cached / decoded capabilities + * @regs: mapped registers, see devm_cxl_setup_hdm() + * @decoder_count: number of decoders for this port + * @target_count: for switch decoders, max downstream port targets + * @interleave_mask: interleave granularity capability, see check_interleave_cap() + * @iw_cap_mask: bitmask of supported interleave ways, see check_interleave_cap() + * @port: mapped cxl_port, see devm_cxl_setup_hdm() + */ struct cxl_hdm { struct cxl_component_regs regs; unsigned int decoder_count; unsigned int target_count; unsigned int interleave_mask; + unsigned long iw_cap_mask; struct cxl_port *port; }; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 0c79d9ce877cc..2f1b49bfe162f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -152,6 +152,15 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } + if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { + rc = devm_cxl_add_nvdimm(parent_port, cxlmd); + if (rc) { + if (rc == -ENODEV) + dev_info(dev, "PMEM disabled by platform\n"); + return rc; + } + } + if (dport->rch) endpoint_parent = parent_port->uport_dev; else @@ -174,14 +183,6 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { - rc = devm_cxl_add_nvdimm(cxlmd); - if (rc == -ENODEV) - dev_info(dev, "PMEM disabled by platform\n"); - else - return rc; - } - /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 0393a9bba3a8a..c788c38dc671e 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -136,10 +136,6 @@ struct dma_fence *dma_fence_get_stub(void) &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); - - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &dma_fence_stub.flags); - dma_fence_signal_locked(&dma_fence_stub); } spin_unlock(&dma_fence_stub_lock); @@ -167,9 +163,6 @@ struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp) &dma_fence_stub_lock, 0, 0); - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &fence->flags); - dma_fence_signal_timestamp(fence, timestamp); return fence; @@ -509,8 +502,6 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) __dma_fence_might_wait(); - dma_fence_enable_sw_signaling(fence); - trace_dma_fence_wait_start(fence); if (fence->ops->wait) ret = fence->ops->wait(fence, intr, timeout); @@ -612,6 +603,9 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence) { unsigned long flags; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return; + spin_lock_irqsave(fence->lock, flags); __dma_fence_enable_signaling(fence); spin_unlock_irqrestore(fence->lock, flags); @@ -764,16 +758,19 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) unsigned long flags; signed long ret = timeout ? timeout : 1; - spin_lock_irqsave(fence->lock, flags); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - goto out; + return ret; + + spin_lock_irqsave(fence->lock, flags); if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; } + if (!__dma_fence_enable_signaling(fence)) + goto out; + if (!timeout) { ret = 0; goto out; diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5f8d010516f07..0645c83e42b02 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -705,6 +705,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * May be called without holding the dma_resv lock. Sets @deadline on * all fences filtered by @usage. */ +#ifdef HAVE_DMA_FENCE_OPS_SET_DEADLINE void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage, ktime_t deadline) { @@ -718,6 +719,7 @@ void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage, dma_resv_iter_end(&cursor); } EXPORT_SYMBOL_GPL(dma_resv_set_deadline); +#endif /** * dma_resv_test_signaled - Test if a reservation object's fences have been diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index ed4b323886e43..b08c90ebef95d 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -145,8 +145,6 @@ static int fence_chains_init(struct fence_chains *fc, unsigned int count, } fc->tail = fc->chains[i]; - - dma_fence_enable_sw_signaling(fc->chains[i]); } fc->chain_length = i; diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index f0cee984b6c74..4105d5ea8ddeb 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -102,8 +102,6 @@ static int sanitycheck(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - array = mock_array(1, f); if (!array) return -ENOMEM; @@ -126,16 +124,12 @@ static int unwrap_array(void *arg) if (!f1) return -ENOMEM; - dma_fence_enable_sw_signaling(f1); - f2 = mock_fence(); if (!f2) { dma_fence_put(f1); return -ENOMEM; } - dma_fence_enable_sw_signaling(f2); - array = mock_array(2, f1, f2); if (!array) return -ENOMEM; @@ -170,16 +164,12 @@ static int unwrap_chain(void *arg) if (!f1) return -ENOMEM; - dma_fence_enable_sw_signaling(f1); - f2 = mock_fence(); if (!f2) { dma_fence_put(f1); return -ENOMEM; } - dma_fence_enable_sw_signaling(f2); - chain = mock_chain(f1, f2); if (!chain) return -ENOMEM; @@ -214,16 +204,12 @@ static int unwrap_chain_array(void *arg) if (!f1) return -ENOMEM; - dma_fence_enable_sw_signaling(f1); - f2 = mock_fence(); if (!f2) { dma_fence_put(f1); return -ENOMEM; } - dma_fence_enable_sw_signaling(f2); - array = mock_array(2, f1, f2); if (!array) return -ENOMEM; @@ -262,16 +248,12 @@ static int unwrap_merge(void *arg) if (!f1) return -ENOMEM; - dma_fence_enable_sw_signaling(f1); - f2 = mock_fence(); if (!f2) { err = -ENOMEM; goto error_put_f1; } - dma_fence_enable_sw_signaling(f2); - f3 = dma_fence_unwrap_merge(f1, f2); if (!f3) { err = -ENOMEM; @@ -314,14 +296,10 @@ static int unwrap_merge_complex(void *arg) if (!f1) return -ENOMEM; - dma_fence_enable_sw_signaling(f1); - f2 = mock_fence(); if (!f2) goto error_put_f1; - dma_fence_enable_sw_signaling(f2); - f3 = dma_fence_unwrap_merge(f1, f2); if (!f3) goto error_put_f2; diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index 6a1bfcd0cc210..d5c106e0b3d0a 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -102,8 +102,6 @@ static int sanitycheck(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_fence_signal(f); dma_fence_put(f); @@ -119,8 +117,6 @@ static int test_signaling(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - if (dma_fence_is_signaled(f)) { pr_err("Fence unexpectedly signaled on creation\n"); goto err_free; @@ -194,8 +190,6 @@ static int test_late_add_callback(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_fence_signal(f); if (!dma_fence_add_callback(f, &cb.cb, simple_callback)) { @@ -288,8 +282,6 @@ static int test_status(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - if (dma_fence_get_status(f)) { pr_err("Fence unexpectedly has signaled status on creation\n"); goto err_free; @@ -316,8 +308,6 @@ static int test_error(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_fence_set_error(f, -EIO); if (dma_fence_get_status(f)) { @@ -347,8 +337,6 @@ static int test_wait(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - if (dma_fence_wait_timeout(f, false, 0) != -ETIME) { pr_err("Wait reported complete before being signaled\n"); goto err_free; @@ -391,8 +379,6 @@ static int test_wait_timeout(void *arg) if (!wt.f) return -ENOMEM; - dma_fence_enable_sw_signaling(wt.f); - if (dma_fence_wait_timeout(wt.f, false, 1) != -ETIME) { pr_err("Wait reported complete before being signaled\n"); goto err_free; @@ -472,8 +458,6 @@ static int thread_signal_callback(void *arg) break; } - dma_fence_enable_sw_signaling(f1); - rcu_assign_pointer(t->fences[t->id], f1); smp_wmb(); diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index 15dbea1462ed4..813779e3c9be5 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -45,8 +45,6 @@ static int sanitycheck(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_fence_signal(f); dma_fence_put(f); @@ -71,8 +69,6 @@ static int test_signaling(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_resv_init(&resv); r = dma_resv_lock(&resv, NULL); if (r) { @@ -118,8 +114,6 @@ static int test_for_each(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_resv_init(&resv); r = dma_resv_lock(&resv, NULL); if (r) { @@ -179,8 +173,6 @@ static int test_for_each_unlocked(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_resv_init(&resv); r = dma_resv_lock(&resv, NULL); if (r) { @@ -252,8 +244,6 @@ static int test_get_fences(void *arg) if (!f) return -ENOMEM; - dma_fence_enable_sw_signaling(f); - dma_resv_init(&resv); r = dma_resv_lock(&resv, NULL); if (r) { diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c new file mode 100644 index 0000000000000..aae82db542a5e --- /dev/null +++ b/drivers/extcon/extcon-arizona.c @@ -0,0 +1,1816 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * extcon-arizona.c - Extcon driver Wolfson Arizona devices + * + * Copyright (C) 2012-2014 Wolfson Microelectronics plc + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#define ARIZONA_MAX_MICD_RANGE 8 + +#define ARIZONA_MICD_CLAMP_MODE_JDL 0x4 +#define ARIZONA_MICD_CLAMP_MODE_JDH 0x5 +#define ARIZONA_MICD_CLAMP_MODE_JDL_GP5H 0x9 +#define ARIZONA_MICD_CLAMP_MODE_JDH_GP5H 0xb + +#define ARIZONA_TST_CAP_DEFAULT 0x3 +#define ARIZONA_TST_CAP_CLAMP 0x1 + +#define ARIZONA_HPDET_MAX 10000 + +#define HPDET_DEBOUNCE 500 +#define DEFAULT_MICD_TIMEOUT 2000 + +#define ARIZONA_HPDET_WAIT_COUNT 15 +#define ARIZONA_HPDET_WAIT_DELAY_MS 20 + +#define QUICK_HEADPHONE_MAX_OHM 3 +#define MICROPHONE_MIN_OHM 1257 +#define MICROPHONE_MAX_OHM 30000 + +#define MICD_DBTIME_TWO_READINGS 2 +#define MICD_DBTIME_FOUR_READINGS 4 + +#define MICD_LVL_1_TO_7 (ARIZONA_MICD_LVL_1 | ARIZONA_MICD_LVL_2 | \ + ARIZONA_MICD_LVL_3 | ARIZONA_MICD_LVL_4 | \ + ARIZONA_MICD_LVL_5 | ARIZONA_MICD_LVL_6 | \ + ARIZONA_MICD_LVL_7) + +#define MICD_LVL_0_TO_7 (ARIZONA_MICD_LVL_0 | MICD_LVL_1_TO_7) + +#define MICD_LVL_0_TO_8 (MICD_LVL_0_TO_7 | ARIZONA_MICD_LVL_8) + +struct arizona_extcon_info { + struct device *dev; + struct arizona *arizona; + struct mutex lock; + struct regulator *micvdd; + struct input_dev *input; + + u16 last_jackdet; + + int micd_mode; + const struct arizona_micd_config *micd_modes; + int micd_num_modes; + + const struct arizona_micd_range *micd_ranges; + int num_micd_ranges; + + bool micd_reva; + bool micd_clamp; + + struct delayed_work hpdet_work; + struct delayed_work micd_detect_work; + struct delayed_work micd_timeout_work; + + bool hpdet_active; + bool hpdet_done; + bool hpdet_retried; + + int num_hpdet_res; + unsigned int hpdet_res[3]; + + bool mic; + bool detecting; + int jack_flips; + + int hpdet_ip_version; + + struct extcon_dev *edev; + + struct gpio_desc *micd_pol_gpio; +}; + +static const struct arizona_micd_config micd_default_modes[] = { + { ARIZONA_ACCDET_SRC, 1, 0 }, + { 0, 2, 1 }, +}; + +static const struct arizona_micd_range micd_default_ranges[] = { + { .max = 11, .key = BTN_0 }, + { .max = 28, .key = BTN_1 }, + { .max = 54, .key = BTN_2 }, + { .max = 100, .key = BTN_3 }, + { .max = 186, .key = BTN_4 }, + { .max = 430, .key = BTN_5 }, +}; + +/* The number of levels in arizona_micd_levels valid for button thresholds */ +#define ARIZONA_NUM_MICD_BUTTON_LEVELS 64 + +static const int arizona_micd_levels[] = { + 3, 6, 8, 11, 13, 16, 18, 21, 23, 26, 28, 31, 34, 36, 39, 41, 44, 46, + 49, 52, 54, 57, 60, 62, 65, 67, 70, 73, 75, 78, 81, 83, 89, 94, 100, + 105, 111, 116, 122, 127, 139, 150, 161, 173, 186, 196, 209, 220, 245, + 270, 295, 321, 348, 375, 402, 430, 489, 550, 614, 681, 752, 903, 1071, + 1257, 30000, +}; + +static const unsigned int arizona_cable[] = { + EXTCON_MECHANICAL, + EXTCON_JACK_MICROPHONE, + EXTCON_JACK_HEADPHONE, + EXTCON_JACK_LINE_OUT, + EXTCON_NONE, +}; + +static void arizona_start_hpdet_acc_id(struct arizona_extcon_info *info); + +static void arizona_extcon_hp_clamp(struct arizona_extcon_info *info, + bool clamp) +{ + struct arizona *arizona = info->arizona; + unsigned int mask = 0, val = 0; + unsigned int cap_sel = 0; + int ret; + + switch (arizona->type) { + case WM8998: + case WM1814: + mask = 0; + break; + case WM5110: + case WM8280: + mask = ARIZONA_HP1L_SHRTO | ARIZONA_HP1L_FLWR | + ARIZONA_HP1L_SHRTI; + if (clamp) { + val = ARIZONA_HP1L_SHRTO; + cap_sel = ARIZONA_TST_CAP_CLAMP; + } else { + val = ARIZONA_HP1L_FLWR | ARIZONA_HP1L_SHRTI; + cap_sel = ARIZONA_TST_CAP_DEFAULT; + } + + ret = regmap_update_bits(arizona->regmap, + ARIZONA_HP_TEST_CTRL_1, + ARIZONA_HP1_TST_CAP_SEL_MASK, + cap_sel); + if (ret != 0) + dev_warn(arizona->dev, + "Failed to set TST_CAP_SEL: %d\n", ret); + break; + default: + mask = ARIZONA_RMV_SHRT_HP1L; + if (clamp) + val = ARIZONA_RMV_SHRT_HP1L; + break; + } + + snd_soc_dapm_mutex_lock(arizona->dapm); + + arizona->hpdet_clamp = clamp; + + /* Keep the HP output stages disabled while doing the clamp */ + if (clamp) { + ret = regmap_update_bits(arizona->regmap, + ARIZONA_OUTPUT_ENABLES_1, + ARIZONA_OUT1L_ENA | + ARIZONA_OUT1R_ENA, 0); + if (ret != 0) + dev_warn(arizona->dev, + "Failed to disable headphone outputs: %d\n", + ret); + } + + if (mask) { + ret = regmap_update_bits(arizona->regmap, ARIZONA_HP_CTRL_1L, + mask, val); + if (ret != 0) + dev_warn(arizona->dev, "Failed to do clamp: %d\n", + ret); + + ret = regmap_update_bits(arizona->regmap, ARIZONA_HP_CTRL_1R, + mask, val); + if (ret != 0) + dev_warn(arizona->dev, "Failed to do clamp: %d\n", + ret); + } + + /* Restore the desired state while not doing the clamp */ + if (!clamp) { + ret = regmap_update_bits(arizona->regmap, + ARIZONA_OUTPUT_ENABLES_1, + ARIZONA_OUT1L_ENA | + ARIZONA_OUT1R_ENA, arizona->hp_ena); + if (ret != 0) + dev_warn(arizona->dev, + "Failed to restore headphone outputs: %d\n", + ret); + } + + snd_soc_dapm_mutex_unlock(arizona->dapm); +} + +static void arizona_extcon_set_mode(struct arizona_extcon_info *info, int mode) +{ + struct arizona *arizona = info->arizona; + + mode %= info->micd_num_modes; + + gpiod_set_value_cansleep(info->micd_pol_gpio, + info->micd_modes[mode].gpio); + + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_BIAS_SRC_MASK, + info->micd_modes[mode].bias << + ARIZONA_MICD_BIAS_SRC_SHIFT); + regmap_update_bits(arizona->regmap, ARIZONA_ACCESSORY_DETECT_MODE_1, + ARIZONA_ACCDET_SRC, info->micd_modes[mode].src); + + info->micd_mode = mode; + + dev_dbg(arizona->dev, "Set jack polarity to %d\n", mode); +} + +static const char *arizona_extcon_get_micbias(struct arizona_extcon_info *info) +{ + switch (info->micd_modes[0].bias) { + case 1: + return "MICBIAS1"; + case 2: + return "MICBIAS2"; + case 3: + return "MICBIAS3"; + default: + return "MICVDD"; + } +} + +static void arizona_extcon_pulse_micbias(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + const char *widget = arizona_extcon_get_micbias(info); + struct snd_soc_dapm_context *dapm = arizona->dapm; + struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); + int ret; + + ret = snd_soc_component_force_enable_pin(component, widget); + if (ret != 0) + dev_warn(arizona->dev, "Failed to enable %s: %d\n", + widget, ret); + + snd_soc_dapm_sync(dapm); + + if (!arizona->pdata.micd_force_micbias) { + ret = snd_soc_component_disable_pin(component, widget); + if (ret != 0) + dev_warn(arizona->dev, "Failed to disable %s: %d\n", + widget, ret); + + snd_soc_dapm_sync(dapm); + } +} + +static void arizona_start_mic(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + bool change; + int ret; + unsigned int mode; + + /* Microphone detection can't use idle mode */ + pm_runtime_get(info->dev); + + if (info->detecting) { + ret = regulator_allow_bypass(info->micvdd, false); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to regulate MICVDD: %d\n", + ret); + } + } + + ret = regulator_enable(info->micvdd); + if (ret != 0) { + dev_err(arizona->dev, "Failed to enable MICVDD: %d\n", + ret); + } + + if (info->micd_reva) { + const struct reg_sequence reva[] = { + { 0x80, 0x3 }, + { 0x294, 0x0 }, + { 0x80, 0x0 }, + }; + + regmap_multi_reg_write(arizona->regmap, reva, ARRAY_SIZE(reva)); + } + + if (info->detecting && arizona->pdata.micd_software_compare) + mode = ARIZONA_ACCDET_MODE_ADC; + else + mode = ARIZONA_ACCDET_MODE_MIC; + + regmap_update_bits(arizona->regmap, + ARIZONA_ACCESSORY_DETECT_MODE_1, + ARIZONA_ACCDET_MODE_MASK, mode); + + arizona_extcon_pulse_micbias(info); + + ret = regmap_update_bits_check(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_ENA, ARIZONA_MICD_ENA, + &change); + if (ret < 0) { + dev_err(arizona->dev, "Failed to enable micd: %d\n", ret); + } else if (!change) { + regulator_disable(info->micvdd); + pm_runtime_put_autosuspend(info->dev); + } +} + +static void arizona_stop_mic(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + const char *widget = arizona_extcon_get_micbias(info); + struct snd_soc_dapm_context *dapm = arizona->dapm; + struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); + bool change = false; + int ret; + + ret = regmap_update_bits_check(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_ENA, 0, + &change); + if (ret < 0) + dev_err(arizona->dev, "Failed to disable micd: %d\n", ret); + + ret = snd_soc_component_disable_pin(component, widget); + if (ret != 0) + dev_warn(arizona->dev, + "Failed to disable %s: %d\n", + widget, ret); + + snd_soc_dapm_sync(dapm); + + if (info->micd_reva) { + const struct reg_sequence reva[] = { + { 0x80, 0x3 }, + { 0x294, 0x2 }, + { 0x80, 0x0 }, + }; + + regmap_multi_reg_write(arizona->regmap, reva, ARRAY_SIZE(reva)); + } + + ret = regulator_allow_bypass(info->micvdd, true); + if (ret != 0) { + dev_err(arizona->dev, "Failed to bypass MICVDD: %d\n", + ret); + } + + if (change) { + regulator_disable(info->micvdd); + pm_runtime_mark_last_busy(info->dev); + pm_runtime_put_autosuspend(info->dev); + } +} + +static struct { + unsigned int threshold; + unsigned int factor_a; + unsigned int factor_b; +} arizona_hpdet_b_ranges[] = { + { 100, 5528, 362464 }, + { 169, 11084, 6186851 }, + { 169, 11065, 65460395 }, +}; + +#define ARIZONA_HPDET_B_RANGE_MAX 0x3fb + +static struct { + int min; + int max; +} arizona_hpdet_c_ranges[] = { + { 0, 30 }, + { 8, 100 }, + { 100, 1000 }, + { 1000, 10000 }, +}; + +static int arizona_hpdet_read(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + unsigned int val, range; + int ret; + + ret = regmap_read(arizona->regmap, ARIZONA_HEADPHONE_DETECT_2, &val); + if (ret != 0) { + dev_err(arizona->dev, "Failed to read HPDET status: %d\n", + ret); + return ret; + } + + switch (info->hpdet_ip_version) { + case 0: + if (!(val & ARIZONA_HP_DONE)) { + dev_err(arizona->dev, "HPDET did not complete: %x\n", + val); + return -EAGAIN; + } + + val &= ARIZONA_HP_LVL_MASK; + break; + + case 1: + if (!(val & ARIZONA_HP_DONE_B)) { + dev_err(arizona->dev, "HPDET did not complete: %x\n", + val); + return -EAGAIN; + } + + ret = regmap_read(arizona->regmap, ARIZONA_HP_DACVAL, &val); + if (ret != 0) { + dev_err(arizona->dev, "Failed to read HP value: %d\n", + ret); + return -EAGAIN; + } + + regmap_read(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1, + &range); + range = (range & ARIZONA_HP_IMPEDANCE_RANGE_MASK) + >> ARIZONA_HP_IMPEDANCE_RANGE_SHIFT; + + if (range < ARRAY_SIZE(arizona_hpdet_b_ranges) - 1 && + (val < arizona_hpdet_b_ranges[range].threshold || + val >= ARIZONA_HPDET_B_RANGE_MAX)) { + range++; + dev_dbg(arizona->dev, "Moving to HPDET range %d\n", + range); + regmap_update_bits(arizona->regmap, + ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_IMPEDANCE_RANGE_MASK, + range << + ARIZONA_HP_IMPEDANCE_RANGE_SHIFT); + return -EAGAIN; + } + + /* If we go out of range report top of range */ + if (val < arizona_hpdet_b_ranges[range].threshold || + val >= ARIZONA_HPDET_B_RANGE_MAX) { + dev_dbg(arizona->dev, "Measurement out of range\n"); + return ARIZONA_HPDET_MAX; + } + + dev_dbg(arizona->dev, "HPDET read %d in range %d\n", + val, range); + + val = arizona_hpdet_b_ranges[range].factor_b + / ((val * 100) - + arizona_hpdet_b_ranges[range].factor_a); + break; + + case 2: + if (!(val & ARIZONA_HP_DONE_B)) { + dev_err(arizona->dev, "HPDET did not complete: %x\n", + val); + return -EAGAIN; + } + + val &= ARIZONA_HP_LVL_B_MASK; + /* Convert to ohms, the value is in 0.5 ohm increments */ + val /= 2; + + regmap_read(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1, + &range); + range = (range & ARIZONA_HP_IMPEDANCE_RANGE_MASK) + >> ARIZONA_HP_IMPEDANCE_RANGE_SHIFT; + + /* Skip up a range, or report? */ + if (range < ARRAY_SIZE(arizona_hpdet_c_ranges) - 1 && + (val >= arizona_hpdet_c_ranges[range].max)) { + range++; + dev_dbg(arizona->dev, "Moving to HPDET range %d-%d\n", + arizona_hpdet_c_ranges[range].min, + arizona_hpdet_c_ranges[range].max); + regmap_update_bits(arizona->regmap, + ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_IMPEDANCE_RANGE_MASK, + range << + ARIZONA_HP_IMPEDANCE_RANGE_SHIFT); + return -EAGAIN; + } + + if (range && (val < arizona_hpdet_c_ranges[range].min)) { + dev_dbg(arizona->dev, "Reporting range boundary %d\n", + arizona_hpdet_c_ranges[range].min); + val = arizona_hpdet_c_ranges[range].min; + } + break; + + default: + dev_warn(arizona->dev, "Unknown HPDET IP revision %d\n", + info->hpdet_ip_version); + return -EINVAL; + } + + dev_dbg(arizona->dev, "HP impedance %d ohms\n", val); + return val; +} + +static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading, + bool *mic) +{ + struct arizona *arizona = info->arizona; + int id_gpio = arizona->pdata.hpdet_id_gpio; + + if (!arizona->pdata.hpdet_acc_id) + return 0; + + /* + * If we're using HPDET for accessory identification we need + * to take multiple measurements, step through them in sequence. + */ + info->hpdet_res[info->num_hpdet_res++] = *reading; + + /* Only check the mic directly if we didn't already ID it */ + if (id_gpio && info->num_hpdet_res == 1) { + dev_dbg(arizona->dev, "Measuring mic\n"); + + regmap_update_bits(arizona->regmap, + ARIZONA_ACCESSORY_DETECT_MODE_1, + ARIZONA_ACCDET_MODE_MASK | + ARIZONA_ACCDET_SRC, + ARIZONA_ACCDET_MODE_HPR | + info->micd_modes[0].src); + + gpio_set_value_cansleep(id_gpio, 1); + + regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_POLL, ARIZONA_HP_POLL); + return -EAGAIN; + } + + /* OK, got both. Now, compare... */ + dev_dbg(arizona->dev, "HPDET measured %d %d\n", + info->hpdet_res[0], info->hpdet_res[1]); + + /* Take the headphone impedance for the main report */ + *reading = info->hpdet_res[0]; + + /* Sometimes we get false readings due to slow insert */ + if (*reading >= ARIZONA_HPDET_MAX && !info->hpdet_retried) { + dev_dbg(arizona->dev, "Retrying high impedance\n"); + info->num_hpdet_res = 0; + info->hpdet_retried = true; + arizona_start_hpdet_acc_id(info); + pm_runtime_put(info->dev); + return -EAGAIN; + } + + /* + * If we measure the mic as high impedance + */ + if (!id_gpio || info->hpdet_res[1] > 50) { + dev_dbg(arizona->dev, "Detected mic\n"); + *mic = true; + info->detecting = true; + } else { + dev_dbg(arizona->dev, "Detected headphone\n"); + } + + /* Make sure everything is reset back to the real polarity */ + regmap_update_bits(arizona->regmap, ARIZONA_ACCESSORY_DETECT_MODE_1, + ARIZONA_ACCDET_SRC, info->micd_modes[0].src); + + return 0; +} + +static irqreturn_t arizona_hpdet_irq(int irq, void *data) +{ + struct arizona_extcon_info *info = data; + struct arizona *arizona = info->arizona; + int id_gpio = arizona->pdata.hpdet_id_gpio; + unsigned int report = EXTCON_JACK_HEADPHONE; + int ret, reading; + bool mic = false; + + mutex_lock(&info->lock); + + /* If we got a spurious IRQ for some reason then ignore it */ + if (!info->hpdet_active) { + dev_warn(arizona->dev, "Spurious HPDET IRQ\n"); + mutex_unlock(&info->lock); + return IRQ_NONE; + } + + /* If the cable was removed while measuring ignore the result */ + ret = extcon_get_state(info->edev, EXTCON_MECHANICAL); + if (ret < 0) { + dev_err(arizona->dev, "Failed to check cable state: %d\n", + ret); + goto out; + } else if (!ret) { + dev_dbg(arizona->dev, "Ignoring HPDET for removed cable\n"); + goto done; + } + + ret = arizona_hpdet_read(info); + if (ret == -EAGAIN) + goto out; + else if (ret < 0) + goto done; + reading = ret; + + /* Reset back to starting range */ + regmap_update_bits(arizona->regmap, + ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_IMPEDANCE_RANGE_MASK | ARIZONA_HP_POLL, + 0); + + ret = arizona_hpdet_do_id(info, &reading, &mic); + if (ret == -EAGAIN) + goto out; + else if (ret < 0) + goto done; + + /* Report high impedence cables as line outputs */ + if (reading >= 5000) + report = EXTCON_JACK_LINE_OUT; + else + report = EXTCON_JACK_HEADPHONE; + + ret = extcon_set_state_sync(info->edev, report, true); + if (ret != 0) + dev_err(arizona->dev, "Failed to report HP/line: %d\n", + ret); + +done: + /* Reset back to starting range */ + regmap_update_bits(arizona->regmap, + ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_IMPEDANCE_RANGE_MASK | ARIZONA_HP_POLL, + 0); + + arizona_extcon_hp_clamp(info, false); + + if (id_gpio) + gpio_set_value_cansleep(id_gpio, 0); + + /* If we have a mic then reenable MICDET */ + if (mic || info->mic) + arizona_start_mic(info); + + if (info->hpdet_active) { + pm_runtime_put_autosuspend(info->dev); + info->hpdet_active = false; + } + + info->hpdet_done = true; + +out: + mutex_unlock(&info->lock); + + return IRQ_HANDLED; +} + +static void arizona_identify_headphone(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + int ret; + + if (info->hpdet_done) + return; + + dev_dbg(arizona->dev, "Starting HPDET\n"); + + /* Make sure we keep the device enabled during the measurement */ + pm_runtime_get(info->dev); + + info->hpdet_active = true; + + arizona_stop_mic(info); + + arizona_extcon_hp_clamp(info, true); + + ret = regmap_update_bits(arizona->regmap, + ARIZONA_ACCESSORY_DETECT_MODE_1, + ARIZONA_ACCDET_MODE_MASK, + arizona->pdata.hpdet_channel); + if (ret != 0) { + dev_err(arizona->dev, "Failed to set HPDET mode: %d\n", ret); + goto err; + } + + ret = regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_POLL, ARIZONA_HP_POLL); + if (ret != 0) { + dev_err(arizona->dev, "Can't start HPDETL measurement: %d\n", + ret); + goto err; + } + + return; + +err: + arizona_extcon_hp_clamp(info, false); + pm_runtime_put_autosuspend(info->dev); + + /* Just report headphone */ + ret = extcon_set_state_sync(info->edev, EXTCON_JACK_HEADPHONE, true); + if (ret != 0) + dev_err(arizona->dev, "Failed to report headphone: %d\n", ret); + + if (info->mic) + arizona_start_mic(info); + + info->hpdet_active = false; +} + +static void arizona_start_hpdet_acc_id(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + int hp_reading = 32; + bool mic; + int ret; + + dev_dbg(arizona->dev, "Starting identification via HPDET\n"); + + /* Make sure we keep the device enabled during the measurement */ + pm_runtime_get_sync(info->dev); + + info->hpdet_active = true; + + arizona_extcon_hp_clamp(info, true); + + ret = regmap_update_bits(arizona->regmap, + ARIZONA_ACCESSORY_DETECT_MODE_1, + ARIZONA_ACCDET_SRC | ARIZONA_ACCDET_MODE_MASK, + info->micd_modes[0].src | + arizona->pdata.hpdet_channel); + if (ret != 0) { + dev_err(arizona->dev, "Failed to set HPDET mode: %d\n", ret); + goto err; + } + + if (arizona->pdata.hpdet_acc_id_line) { + ret = regmap_update_bits(arizona->regmap, + ARIZONA_HEADPHONE_DETECT_1, + ARIZONA_HP_POLL, ARIZONA_HP_POLL); + if (ret != 0) { + dev_err(arizona->dev, + "Can't start HPDETL measurement: %d\n", + ret); + goto err; + } + } else { + arizona_hpdet_do_id(info, &hp_reading, &mic); + } + + return; + +err: + /* Just report headphone */ + ret = extcon_set_state_sync(info->edev, EXTCON_JACK_HEADPHONE, true); + if (ret != 0) + dev_err(arizona->dev, "Failed to report headphone: %d\n", ret); + + info->hpdet_active = false; +} + +static void arizona_micd_timeout_work(struct work_struct *work) +{ + struct arizona_extcon_info *info = container_of(work, + struct arizona_extcon_info, + micd_timeout_work.work); + + mutex_lock(&info->lock); + + dev_dbg(info->arizona->dev, "MICD timed out, reporting HP\n"); + + info->detecting = false; + + arizona_identify_headphone(info); + + mutex_unlock(&info->lock); +} + +static int arizona_micd_adc_read(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + unsigned int val; + int ret; + + /* Must disable MICD before we read the ADCVAL */ + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_ENA, 0); + + ret = regmap_read(arizona->regmap, ARIZONA_MIC_DETECT_4, &val); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to read MICDET_ADCVAL: %d\n", ret); + return ret; + } + + dev_dbg(arizona->dev, "MICDET_ADCVAL: %x\n", val); + + val &= ARIZONA_MICDET_ADCVAL_MASK; + if (val < ARRAY_SIZE(arizona_micd_levels)) + val = arizona_micd_levels[val]; + else + val = INT_MAX; + + if (val <= QUICK_HEADPHONE_MAX_OHM) + val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_0; + else if (val <= MICROPHONE_MIN_OHM) + val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_1; + else if (val <= MICROPHONE_MAX_OHM) + val = ARIZONA_MICD_STS | ARIZONA_MICD_LVL_8; + else + val = ARIZONA_MICD_LVL_8; + + return val; +} + +static int arizona_micd_read(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + unsigned int val = 0; + int ret, i; + + for (i = 0; i < 10 && !(val & MICD_LVL_0_TO_8); i++) { + ret = regmap_read(arizona->regmap, ARIZONA_MIC_DETECT_3, &val); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to read MICDET: %d\n", ret); + return ret; + } + + dev_dbg(arizona->dev, "MICDET: %x\n", val); + + if (!(val & ARIZONA_MICD_VALID)) { + dev_warn(arizona->dev, + "Microphone detection state invalid\n"); + return -EINVAL; + } + } + + if (i == 10 && !(val & MICD_LVL_0_TO_8)) { + dev_err(arizona->dev, "Failed to get valid MICDET value\n"); + return -EINVAL; + } + + return val; +} + +static int arizona_micdet_reading(void *priv) +{ + struct arizona_extcon_info *info = priv; + struct arizona *arizona = info->arizona; + int ret, val; + + if (info->detecting && arizona->pdata.micd_software_compare) + ret = arizona_micd_adc_read(info); + else + ret = arizona_micd_read(info); + if (ret < 0) + return ret; + + val = ret; + + /* Due to jack detect this should never happen */ + if (!(val & ARIZONA_MICD_STS)) { + dev_warn(arizona->dev, "Detected open circuit\n"); + info->mic = false; + info->detecting = false; + arizona_identify_headphone(info); + return 0; + } + + /* If we got a high impedence we should have a headset, report it. */ + if (val & ARIZONA_MICD_LVL_8) { + info->mic = true; + info->detecting = false; + + arizona_identify_headphone(info); + + ret = extcon_set_state_sync(info->edev, + EXTCON_JACK_MICROPHONE, true); + if (ret != 0) + dev_err(arizona->dev, "Headset report failed: %d\n", + ret); + + /* Don't need to regulate for button detection */ + ret = regulator_allow_bypass(info->micvdd, true); + if (ret != 0) { + dev_err(arizona->dev, "Failed to bypass MICVDD: %d\n", + ret); + } + + return 0; + } + + /* If we detected a lower impedence during initial startup + * then we probably have the wrong polarity, flip it. Don't + * do this for the lowest impedences to speed up detection of + * plain headphones. If both polarities report a low + * impedence then give up and report headphones. + */ + if (val & MICD_LVL_1_TO_7) { + if (info->jack_flips >= info->micd_num_modes * 10) { + dev_dbg(arizona->dev, "Detected HP/line\n"); + + info->detecting = false; + + arizona_identify_headphone(info); + } else { + info->micd_mode++; + if (info->micd_mode == info->micd_num_modes) + info->micd_mode = 0; + arizona_extcon_set_mode(info, info->micd_mode); + + info->jack_flips++; + + if (arizona->pdata.micd_software_compare) + regmap_update_bits(arizona->regmap, + ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_ENA, + ARIZONA_MICD_ENA); + + queue_delayed_work(system_power_efficient_wq, + &info->micd_timeout_work, + msecs_to_jiffies(arizona->pdata.micd_timeout)); + } + + return 0; + } + + /* + * If we're still detecting and we detect a short then we've + * got a headphone. + */ + dev_dbg(arizona->dev, "Headphone detected\n"); + info->detecting = false; + + arizona_identify_headphone(info); + + return 0; +} + +static int arizona_button_reading(void *priv) +{ + struct arizona_extcon_info *info = priv; + struct arizona *arizona = info->arizona; + int val, key, lvl, i; + + val = arizona_micd_read(info); + if (val < 0) + return val; + + /* + * If we're still detecting and we detect a short then we've + * got a headphone. Otherwise it's a button press. + */ + if (val & MICD_LVL_0_TO_7) { + if (info->mic) { + dev_dbg(arizona->dev, "Mic button detected\n"); + + lvl = val & ARIZONA_MICD_LVL_MASK; + lvl >>= ARIZONA_MICD_LVL_SHIFT; + + for (i = 0; i < info->num_micd_ranges; i++) + input_report_key(info->input, + info->micd_ranges[i].key, 0); + + if (lvl && ffs(lvl) - 1 < info->num_micd_ranges) { + key = info->micd_ranges[ffs(lvl) - 1].key; + input_report_key(info->input, key, 1); + input_sync(info->input); + } else { + dev_err(arizona->dev, "Button out of range\n"); + } + } else { + dev_warn(arizona->dev, "Button with no mic: %x\n", + val); + } + } else { + dev_dbg(arizona->dev, "Mic button released\n"); + for (i = 0; i < info->num_micd_ranges; i++) + input_report_key(info->input, + info->micd_ranges[i].key, 0); + input_sync(info->input); + arizona_extcon_pulse_micbias(info); + } + + return 0; +} + +static void arizona_micd_detect(struct work_struct *work) +{ + struct arizona_extcon_info *info = container_of(work, + struct arizona_extcon_info, + micd_detect_work.work); + struct arizona *arizona = info->arizona; + int ret; + + cancel_delayed_work_sync(&info->micd_timeout_work); + + mutex_lock(&info->lock); + + /* If the cable was removed while measuring ignore the result */ + ret = extcon_get_state(info->edev, EXTCON_MECHANICAL); + if (ret < 0) { + dev_err(arizona->dev, "Failed to check cable state: %d\n", + ret); + mutex_unlock(&info->lock); + return; + } else if (!ret) { + dev_dbg(arizona->dev, "Ignoring MICDET for removed cable\n"); + mutex_unlock(&info->lock); + return; + } + + if (info->detecting) + arizona_micdet_reading(info); + else + arizona_button_reading(info); + + pm_runtime_mark_last_busy(info->dev); + mutex_unlock(&info->lock); +} + +static irqreturn_t arizona_micdet(int irq, void *data) +{ + struct arizona_extcon_info *info = data; + struct arizona *arizona = info->arizona; + int debounce = arizona->pdata.micd_detect_debounce; + + cancel_delayed_work_sync(&info->micd_detect_work); + cancel_delayed_work_sync(&info->micd_timeout_work); + + mutex_lock(&info->lock); + if (!info->detecting) + debounce = 0; + mutex_unlock(&info->lock); + + if (debounce) + queue_delayed_work(system_power_efficient_wq, + &info->micd_detect_work, + msecs_to_jiffies(debounce)); + else + arizona_micd_detect(&info->micd_detect_work.work); + + return IRQ_HANDLED; +} + +static void arizona_hpdet_work(struct work_struct *work) +{ + struct arizona_extcon_info *info = container_of(work, + struct arizona_extcon_info, + hpdet_work.work); + + mutex_lock(&info->lock); + arizona_start_hpdet_acc_id(info); + mutex_unlock(&info->lock); +} + +static int arizona_hpdet_wait(struct arizona_extcon_info *info) +{ + struct arizona *arizona = info->arizona; + unsigned int val; + int i, ret; + + for (i = 0; i < ARIZONA_HPDET_WAIT_COUNT; i++) { + ret = regmap_read(arizona->regmap, ARIZONA_HEADPHONE_DETECT_2, + &val); + if (ret) { + dev_err(arizona->dev, + "Failed to read HPDET state: %d\n", ret); + return ret; + } + + switch (info->hpdet_ip_version) { + case 0: + if (val & ARIZONA_HP_DONE) + return 0; + break; + default: + if (val & ARIZONA_HP_DONE_B) + return 0; + break; + } + + msleep(ARIZONA_HPDET_WAIT_DELAY_MS); + } + + dev_warn(arizona->dev, "HPDET did not appear to complete\n"); + + return -ETIMEDOUT; +} + +static irqreturn_t arizona_jackdet(int irq, void *data) +{ + struct arizona_extcon_info *info = data; + struct arizona *arizona = info->arizona; + unsigned int val, present, mask; + bool cancelled_hp, cancelled_mic; + int ret, i; + + cancelled_hp = cancel_delayed_work_sync(&info->hpdet_work); + cancelled_mic = cancel_delayed_work_sync(&info->micd_timeout_work); + + pm_runtime_get_sync(info->dev); + + mutex_lock(&info->lock); + + if (info->micd_clamp) { + mask = ARIZONA_MICD_CLAMP_STS; + present = 0; + } else { + mask = ARIZONA_JD1_STS; + if (arizona->pdata.jd_invert) + present = 0; + else + present = ARIZONA_JD1_STS; + } + + ret = regmap_read(arizona->regmap, ARIZONA_AOD_IRQ_RAW_STATUS, &val); + if (ret != 0) { + dev_err(arizona->dev, "Failed to read jackdet status: %d\n", + ret); + mutex_unlock(&info->lock); + pm_runtime_put_autosuspend(info->dev); + return IRQ_NONE; + } + + val &= mask; + if (val == info->last_jackdet) { + dev_dbg(arizona->dev, "Suppressing duplicate JACKDET\n"); + if (cancelled_hp) + queue_delayed_work(system_power_efficient_wq, + &info->hpdet_work, + msecs_to_jiffies(HPDET_DEBOUNCE)); + + if (cancelled_mic) { + int micd_timeout = arizona->pdata.micd_timeout; + + queue_delayed_work(system_power_efficient_wq, + &info->micd_timeout_work, + msecs_to_jiffies(micd_timeout)); + } + + goto out; + } + info->last_jackdet = val; + + if (info->last_jackdet == present) { + dev_dbg(arizona->dev, "Detected jack\n"); + ret = extcon_set_state_sync(info->edev, + EXTCON_MECHANICAL, true); + + if (ret != 0) + dev_err(arizona->dev, "Mechanical report failed: %d\n", + ret); + + info->detecting = true; + info->mic = false; + info->jack_flips = 0; + + if (!arizona->pdata.hpdet_acc_id) { + arizona_start_mic(info); + } else { + queue_delayed_work(system_power_efficient_wq, + &info->hpdet_work, + msecs_to_jiffies(HPDET_DEBOUNCE)); + } + + if (info->micd_clamp || !arizona->pdata.jd_invert) + regmap_update_bits(arizona->regmap, + ARIZONA_JACK_DETECT_DEBOUNCE, + ARIZONA_MICD_CLAMP_DB | + ARIZONA_JD1_DB, 0); + } else { + dev_dbg(arizona->dev, "Detected jack removal\n"); + + arizona_stop_mic(info); + + info->num_hpdet_res = 0; + for (i = 0; i < ARRAY_SIZE(info->hpdet_res); i++) + info->hpdet_res[i] = 0; + info->mic = false; + info->hpdet_done = false; + info->hpdet_retried = false; + + for (i = 0; i < info->num_micd_ranges; i++) + input_report_key(info->input, + info->micd_ranges[i].key, 0); + input_sync(info->input); + + for (i = 0; i < ARRAY_SIZE(arizona_cable) - 1; i++) { + ret = extcon_set_state_sync(info->edev, + arizona_cable[i], false); + if (ret != 0) + dev_err(arizona->dev, + "Removal report failed: %d\n", ret); + } + + /* + * If the jack was removed during a headphone detection we + * need to wait for the headphone detection to finish, as + * it can not be aborted. We don't want to be able to start + * a new headphone detection from a fresh insert until this + * one is finished. + */ + arizona_hpdet_wait(info); + + regmap_update_bits(arizona->regmap, + ARIZONA_JACK_DETECT_DEBOUNCE, + ARIZONA_MICD_CLAMP_DB | ARIZONA_JD1_DB, + ARIZONA_MICD_CLAMP_DB | ARIZONA_JD1_DB); + } + +out: + /* Clear trig_sts to make sure DCVDD is not forced up */ + regmap_write(arizona->regmap, ARIZONA_AOD_WKUP_AND_TRIG, + ARIZONA_MICD_CLAMP_FALL_TRIG_STS | + ARIZONA_MICD_CLAMP_RISE_TRIG_STS | + ARIZONA_JD1_FALL_TRIG_STS | + ARIZONA_JD1_RISE_TRIG_STS); + + mutex_unlock(&info->lock); + + pm_runtime_mark_last_busy(info->dev); + pm_runtime_put_autosuspend(info->dev); + + return IRQ_HANDLED; +} + +/* Map a level onto a slot in the register bank */ +static void arizona_micd_set_level(struct arizona *arizona, int index, + unsigned int level) +{ + int reg; + unsigned int mask; + + reg = ARIZONA_MIC_DETECT_LEVEL_4 - (index / 2); + + if (!(index % 2)) { + mask = 0x3f00; + level <<= 8; + } else { + mask = 0x3f; + } + + /* Program the level itself */ + regmap_update_bits(arizona->regmap, reg, mask, level); +} + +static int arizona_extcon_get_micd_configs(struct device *dev, + struct arizona *arizona) +{ + const char * const prop = "wlf,micd-configs"; + const int entries_per_config = 3; + struct arizona_micd_config *micd_configs; + int nconfs, ret; + int i, j; + u32 *vals; + + nconfs = device_property_count_u32(arizona->dev, prop); + if (nconfs <= 0) + return 0; + + vals = kcalloc(nconfs, sizeof(u32), GFP_KERNEL); + if (!vals) + return -ENOMEM; + + ret = device_property_read_u32_array(arizona->dev, prop, vals, nconfs); + if (ret < 0) + goto out; + + nconfs /= entries_per_config; + micd_configs = devm_kcalloc(dev, nconfs, sizeof(*micd_configs), + GFP_KERNEL); + if (!micd_configs) { + ret = -ENOMEM; + goto out; + } + + for (i = 0, j = 0; i < nconfs; ++i) { + micd_configs[i].src = vals[j++] ? ARIZONA_ACCDET_SRC : 0; + micd_configs[i].bias = vals[j++]; + micd_configs[i].gpio = vals[j++]; + } + + arizona->pdata.micd_configs = micd_configs; + arizona->pdata.num_micd_configs = nconfs; + +out: + kfree(vals); + return ret; +} + +static int arizona_extcon_device_get_pdata(struct device *dev, + struct arizona *arizona) +{ + struct arizona_pdata *pdata = &arizona->pdata; + unsigned int val = ARIZONA_ACCDET_MODE_HPL; + int ret; + + device_property_read_u32(arizona->dev, "wlf,hpdet-channel", &val); + switch (val) { + case ARIZONA_ACCDET_MODE_HPL: + case ARIZONA_ACCDET_MODE_HPR: + pdata->hpdet_channel = val; + break; + default: + dev_err(arizona->dev, + "Wrong wlf,hpdet-channel DT value %d\n", val); + pdata->hpdet_channel = ARIZONA_ACCDET_MODE_HPL; + } + + device_property_read_u32(arizona->dev, "wlf,micd-detect-debounce", + &pdata->micd_detect_debounce); + + device_property_read_u32(arizona->dev, "wlf,micd-bias-start-time", + &pdata->micd_bias_start_time); + + device_property_read_u32(arizona->dev, "wlf,micd-rate", + &pdata->micd_rate); + + device_property_read_u32(arizona->dev, "wlf,micd-dbtime", + &pdata->micd_dbtime); + + device_property_read_u32(arizona->dev, "wlf,micd-timeout-ms", + &pdata->micd_timeout); + + pdata->micd_force_micbias = device_property_read_bool(arizona->dev, + "wlf,micd-force-micbias"); + + pdata->micd_software_compare = device_property_read_bool(arizona->dev, + "wlf,micd-software-compare"); + + pdata->jd_invert = device_property_read_bool(arizona->dev, + "wlf,jd-invert"); + + device_property_read_u32(arizona->dev, "wlf,gpsw", &pdata->gpsw); + + pdata->jd_gpio5 = device_property_read_bool(arizona->dev, + "wlf,use-jd2"); + pdata->jd_gpio5_nopull = device_property_read_bool(arizona->dev, + "wlf,use-jd2-nopull"); + + ret = arizona_extcon_get_micd_configs(dev, arizona); + if (ret < 0) + dev_err(arizona->dev, "Failed to read micd configs: %d\n", ret); + + return 0; +} + +static int arizona_extcon_probe(struct platform_device *pdev) +{ + struct arizona *arizona = dev_get_drvdata(pdev->dev.parent); + struct arizona_pdata *pdata = &arizona->pdata; + struct arizona_extcon_info *info; + unsigned int val; + unsigned int clamp_mode; + int jack_irq_fall, jack_irq_rise; + int ret, mode, i, j; + + if (!arizona->dapm || !arizona->dapm->card) + return -EPROBE_DEFER; + + info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + if (!dev_get_platdata(arizona->dev)) + arizona_extcon_device_get_pdata(&pdev->dev, arizona); + + info->micvdd = devm_regulator_get(&pdev->dev, "MICVDD"); + if (IS_ERR(info->micvdd)) { + ret = PTR_ERR(info->micvdd); + dev_err(arizona->dev, "Failed to get MICVDD: %d\n", ret); + return ret; + } + + mutex_init(&info->lock); + info->arizona = arizona; + info->dev = &pdev->dev; + info->last_jackdet = ~(ARIZONA_MICD_CLAMP_STS | ARIZONA_JD1_STS); + INIT_DELAYED_WORK(&info->hpdet_work, arizona_hpdet_work); + INIT_DELAYED_WORK(&info->micd_detect_work, arizona_micd_detect); + INIT_DELAYED_WORK(&info->micd_timeout_work, arizona_micd_timeout_work); + platform_set_drvdata(pdev, info); + + switch (arizona->type) { + case WM5102: + switch (arizona->rev) { + case 0: + info->micd_reva = true; + break; + default: + info->micd_clamp = true; + info->hpdet_ip_version = 1; + break; + } + break; + case WM5110: + case WM8280: + switch (arizona->rev) { + case 0 ... 2: + break; + default: + info->micd_clamp = true; + info->hpdet_ip_version = 2; + break; + } + break; + case WM8998: + case WM1814: + info->micd_clamp = true; + info->hpdet_ip_version = 2; + break; + default: + break; + } + + info->edev = devm_extcon_dev_allocate(&pdev->dev, arizona_cable); + if (IS_ERR(info->edev)) { + dev_err(&pdev->dev, "failed to allocate extcon device\n"); + return -ENOMEM; + } + + ret = devm_extcon_dev_register(&pdev->dev, info->edev); + if (ret < 0) { + dev_err(arizona->dev, "extcon_dev_register() failed: %d\n", + ret); + return ret; + } + + info->input = devm_input_allocate_device(&pdev->dev); + if (!info->input) { + dev_err(arizona->dev, "Can't allocate input dev\n"); + ret = -ENOMEM; + return ret; + } + + info->input->name = "Headset"; + info->input->phys = "arizona/extcon"; + + if (!pdata->micd_timeout) + pdata->micd_timeout = DEFAULT_MICD_TIMEOUT; + + if (pdata->num_micd_configs) { + info->micd_modes = pdata->micd_configs; + info->micd_num_modes = pdata->num_micd_configs; + } else { + info->micd_modes = micd_default_modes; + info->micd_num_modes = ARRAY_SIZE(micd_default_modes); + } + + if (arizona->pdata.gpsw > 0) + regmap_update_bits(arizona->regmap, ARIZONA_GP_SWITCH_1, + ARIZONA_SW1_MODE_MASK, arizona->pdata.gpsw); + + if (pdata->micd_pol_gpio > 0) { + if (info->micd_modes[0].gpio) + mode = GPIOF_OUT_INIT_HIGH; + else + mode = GPIOF_OUT_INIT_LOW; + + ret = devm_gpio_request_one(&pdev->dev, pdata->micd_pol_gpio, + mode, "MICD polarity"); + if (ret != 0) { + dev_err(arizona->dev, "Failed to request GPIO%d: %d\n", + pdata->micd_pol_gpio, ret); + return ret; + } + + info->micd_pol_gpio = gpio_to_desc(pdata->micd_pol_gpio); + } else { + if (info->micd_modes[0].gpio) + mode = GPIOD_OUT_HIGH; + else + mode = GPIOD_OUT_LOW; + + /* We can't use devm here because we need to do the get + * against the MFD device, as that is where the of_node + * will reside, but if we devm against that the GPIO + * will not be freed if the extcon driver is unloaded. + */ + info->micd_pol_gpio = gpiod_get_optional(arizona->dev, + "wlf,micd-pol", + GPIOD_OUT_LOW); + if (IS_ERR(info->micd_pol_gpio)) { + ret = PTR_ERR(info->micd_pol_gpio); + dev_err(arizona->dev, + "Failed to get microphone polarity GPIO: %d\n", + ret); + return ret; + } + } + + if (arizona->pdata.hpdet_id_gpio > 0) { + ret = devm_gpio_request_one(&pdev->dev, + arizona->pdata.hpdet_id_gpio, + GPIOF_OUT_INIT_LOW, + "HPDET"); + if (ret != 0) { + dev_err(arizona->dev, "Failed to request GPIO%d: %d\n", + arizona->pdata.hpdet_id_gpio, ret); + goto err_gpio; + } + } + + if (arizona->pdata.micd_bias_start_time) + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_BIAS_STARTTIME_MASK, + arizona->pdata.micd_bias_start_time + << ARIZONA_MICD_BIAS_STARTTIME_SHIFT); + + if (arizona->pdata.micd_rate) + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_RATE_MASK, + arizona->pdata.micd_rate + << ARIZONA_MICD_RATE_SHIFT); + + switch (arizona->pdata.micd_dbtime) { + case MICD_DBTIME_FOUR_READINGS: + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_DBTIME_MASK, + ARIZONA_MICD_DBTIME); + break; + case MICD_DBTIME_TWO_READINGS: + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_DBTIME_MASK, 0); + break; + default: + break; + } + + BUILD_BUG_ON(ARRAY_SIZE(arizona_micd_levels) < + ARIZONA_NUM_MICD_BUTTON_LEVELS); + + if (arizona->pdata.num_micd_ranges) { + info->micd_ranges = pdata->micd_ranges; + info->num_micd_ranges = pdata->num_micd_ranges; + } else { + info->micd_ranges = micd_default_ranges; + info->num_micd_ranges = ARRAY_SIZE(micd_default_ranges); + } + + if (arizona->pdata.num_micd_ranges > ARIZONA_MAX_MICD_RANGE) { + dev_err(arizona->dev, "Too many MICD ranges: %d\n", + arizona->pdata.num_micd_ranges); + } + + if (info->num_micd_ranges > 1) { + for (i = 1; i < info->num_micd_ranges; i++) { + if (info->micd_ranges[i - 1].max > + info->micd_ranges[i].max) { + dev_err(arizona->dev, + "MICD ranges must be sorted\n"); + ret = -EINVAL; + goto err_gpio; + } + } + } + + /* Disable all buttons by default */ + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_2, + ARIZONA_MICD_LVL_SEL_MASK, 0x81); + + /* Set up all the buttons the user specified */ + for (i = 0; i < info->num_micd_ranges; i++) { + for (j = 0; j < ARIZONA_NUM_MICD_BUTTON_LEVELS; j++) + if (arizona_micd_levels[j] >= info->micd_ranges[i].max) + break; + + if (j == ARIZONA_NUM_MICD_BUTTON_LEVELS) { + dev_err(arizona->dev, "Unsupported MICD level %d\n", + info->micd_ranges[i].max); + ret = -EINVAL; + goto err_gpio; + } + + dev_dbg(arizona->dev, "%d ohms for MICD threshold %d\n", + arizona_micd_levels[j], i); + + arizona_micd_set_level(arizona, i, j); + input_set_capability(info->input, EV_KEY, + info->micd_ranges[i].key); + + /* Enable reporting of that range */ + regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_2, + 1 << i, 1 << i); + } + + /* Set all the remaining keys to a maximum */ + for (; i < ARIZONA_MAX_MICD_RANGE; i++) + arizona_micd_set_level(arizona, i, 0x3f); + + /* + * If we have a clamp use it, activating in conjunction with + * GPIO5 if that is connected for jack detect operation. + */ + if (info->micd_clamp) { + if (arizona->pdata.jd_gpio5) { + /* Put the GPIO into input mode with optional pull */ + val = 0xc101; + if (arizona->pdata.jd_gpio5_nopull) + val &= ~ARIZONA_GPN_PU; + + regmap_write(arizona->regmap, ARIZONA_GPIO5_CTRL, + val); + + if (arizona->pdata.jd_invert) + clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDH_GP5H; + else + clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDL_GP5H; + } else { + if (arizona->pdata.jd_invert) + clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDH; + else + clamp_mode = ARIZONA_MICD_CLAMP_MODE_JDL; + } + + regmap_update_bits(arizona->regmap, + ARIZONA_MICD_CLAMP_CONTROL, + ARIZONA_MICD_CLAMP_MODE_MASK, clamp_mode); + + regmap_update_bits(arizona->regmap, + ARIZONA_JACK_DETECT_DEBOUNCE, + ARIZONA_MICD_CLAMP_DB, + ARIZONA_MICD_CLAMP_DB); + } + + arizona_extcon_set_mode(info, 0); + + pm_runtime_enable(&pdev->dev); + pm_runtime_idle(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + if (info->micd_clamp) { + jack_irq_rise = ARIZONA_IRQ_MICD_CLAMP_RISE; + jack_irq_fall = ARIZONA_IRQ_MICD_CLAMP_FALL; + } else { + jack_irq_rise = ARIZONA_IRQ_JD_RISE; + jack_irq_fall = ARIZONA_IRQ_JD_FALL; + } + + ret = arizona_request_irq(arizona, jack_irq_rise, + "JACKDET rise", arizona_jackdet, info); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to get JACKDET rise IRQ: %d\n", + ret); + goto err_pm; + } + + ret = arizona_set_irq_wake(arizona, jack_irq_rise, 1); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to set JD rise IRQ wake: %d\n", + ret); + goto err_rise; + } + + ret = arizona_request_irq(arizona, jack_irq_fall, + "JACKDET fall", arizona_jackdet, info); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to get JD fall IRQ: %d\n", ret); + goto err_rise_wake; + } + + ret = arizona_set_irq_wake(arizona, jack_irq_fall, 1); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to set JD fall IRQ wake: %d\n", + ret); + goto err_fall; + } + + ret = arizona_request_irq(arizona, ARIZONA_IRQ_MICDET, + "MICDET", arizona_micdet, info); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to get MICDET IRQ: %d\n", ret); + goto err_fall_wake; + } + + ret = arizona_request_irq(arizona, ARIZONA_IRQ_HPDET, + "HPDET", arizona_hpdet_irq, info); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to get HPDET IRQ: %d\n", ret); + goto err_micdet; + } + + arizona_clk32k_enable(arizona); + regmap_update_bits(arizona->regmap, ARIZONA_JACK_DETECT_DEBOUNCE, + ARIZONA_JD1_DB, ARIZONA_JD1_DB); + regmap_update_bits(arizona->regmap, ARIZONA_JACK_DETECT_ANALOGUE, + ARIZONA_JD1_ENA, ARIZONA_JD1_ENA); + + ret = regulator_allow_bypass(info->micvdd, true); + if (ret != 0) + dev_warn(arizona->dev, "Failed to set MICVDD to bypass: %d\n", + ret); + + ret = input_register_device(info->input); + if (ret) { + dev_err(&pdev->dev, "Can't register input device: %d\n", ret); + goto err_hpdet; + } + + pm_runtime_put(&pdev->dev); + + return 0; + +err_hpdet: + arizona_free_irq(arizona, ARIZONA_IRQ_HPDET, info); +err_micdet: + arizona_free_irq(arizona, ARIZONA_IRQ_MICDET, info); +err_fall_wake: + arizona_set_irq_wake(arizona, jack_irq_fall, 0); +err_fall: + arizona_free_irq(arizona, jack_irq_fall, info); +err_rise_wake: + arizona_set_irq_wake(arizona, jack_irq_rise, 0); +err_rise: + arizona_free_irq(arizona, jack_irq_rise, info); +err_pm: + pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); +err_gpio: + gpiod_put(info->micd_pol_gpio); + return ret; +} + +static int arizona_extcon_remove(struct platform_device *pdev) +{ + struct arizona_extcon_info *info = platform_get_drvdata(pdev); + struct arizona *arizona = info->arizona; + int jack_irq_rise, jack_irq_fall; + bool change; + int ret; + + ret = regmap_update_bits_check(arizona->regmap, ARIZONA_MIC_DETECT_1, + ARIZONA_MICD_ENA, 0, + &change); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to disable micd on remove: %d\n", + ret); + } else if (change) { + regulator_disable(info->micvdd); + pm_runtime_put(info->dev); + } + + gpiod_put(info->micd_pol_gpio); + + pm_runtime_disable(&pdev->dev); + + regmap_update_bits(arizona->regmap, + ARIZONA_MICD_CLAMP_CONTROL, + ARIZONA_MICD_CLAMP_MODE_MASK, 0); + + if (info->micd_clamp) { + jack_irq_rise = ARIZONA_IRQ_MICD_CLAMP_RISE; + jack_irq_fall = ARIZONA_IRQ_MICD_CLAMP_FALL; + } else { + jack_irq_rise = ARIZONA_IRQ_JD_RISE; + jack_irq_fall = ARIZONA_IRQ_JD_FALL; + } + + arizona_set_irq_wake(arizona, jack_irq_rise, 0); + arizona_set_irq_wake(arizona, jack_irq_fall, 0); + arizona_free_irq(arizona, ARIZONA_IRQ_HPDET, info); + arizona_free_irq(arizona, ARIZONA_IRQ_MICDET, info); + arizona_free_irq(arizona, jack_irq_rise, info); + arizona_free_irq(arizona, jack_irq_fall, info); + cancel_delayed_work_sync(&info->hpdet_work); + regmap_update_bits(arizona->regmap, ARIZONA_JACK_DETECT_ANALOGUE, + ARIZONA_JD1_ENA, 0); + arizona_clk32k_disable(arizona); + + return 0; +} + +static struct platform_driver arizona_extcon_driver = { + .driver = { + .name = "arizona-extcon", + }, + .probe = arizona_extcon_probe, + .remove = arizona_extcon_remove, +}; + +module_platform_driver(arizona_extcon_driver); + +MODULE_DESCRIPTION("Arizona Extcon driver"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:extcon-arizona"); diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 0d139e4de37c7..8a347b9384064 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1107,9 +1107,16 @@ struct cs_dsp_coeff_parsed_coeff { int len; }; -static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str) +static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, unsigned int avail, + const u8 **str) { - int length; + int length, total_field_len; + + /* String fields are at least one __le32 */ + if (sizeof(__le32) > avail) { + *pos = NULL; + return 0; + } switch (bytes) { case 1: @@ -1122,10 +1129,16 @@ static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str) return 0; } + total_field_len = ((length + bytes) + 3) & ~0x03; + if ((unsigned int)total_field_len > avail) { + *pos = NULL; + return 0; + } + if (str) *str = *pos + bytes; - *pos += ((length + bytes) + 3) & ~0x03; + *pos += total_field_len; return length; } @@ -1150,71 +1163,134 @@ static int cs_dsp_coeff_parse_int(int bytes, const u8 **pos) return val; } -static inline void cs_dsp_coeff_parse_alg(struct cs_dsp *dsp, const u8 **data, - struct cs_dsp_coeff_parsed_alg *blk) +static int cs_dsp_coeff_parse_alg(struct cs_dsp *dsp, + const struct wmfw_region *region, + struct cs_dsp_coeff_parsed_alg *blk) { const struct wmfw_adsp_alg_data *raw; + unsigned int data_len = le32_to_cpu(region->len); + unsigned int pos; + const u8 *tmp; + + raw = (const struct wmfw_adsp_alg_data *)region->data; switch (dsp->fw_ver) { case 0: case 1: - raw = (const struct wmfw_adsp_alg_data *)*data; - *data = raw->data; + if (sizeof(*raw) > data_len) + return -EOVERFLOW; blk->id = le32_to_cpu(raw->id); blk->name = raw->name; - blk->name_len = strlen(raw->name); + blk->name_len = strnlen(raw->name, ARRAY_SIZE(raw->name)); blk->ncoeff = le32_to_cpu(raw->ncoeff); + + pos = sizeof(*raw); break; default: - blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), data); - blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), data, + if (sizeof(raw->id) > data_len) + return -EOVERFLOW; + + tmp = region->data; + blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), &tmp); + pos = tmp - region->data; + + tmp = ®ion->data[pos]; + blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos, &blk->name); - cs_dsp_coeff_parse_string(sizeof(u16), data, NULL); - blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), data); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + cs_dsp_coeff_parse_string(sizeof(u16), &tmp, data_len - pos, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + if (sizeof(raw->ncoeff) > (data_len - pos)) + return -EOVERFLOW; + + blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), &tmp); + pos += sizeof(raw->ncoeff); break; } + if ((int)blk->ncoeff < 0) + return -EOVERFLOW; + cs_dsp_dbg(dsp, "Algorithm ID: %#x\n", blk->id); cs_dsp_dbg(dsp, "Algorithm name: %.*s\n", blk->name_len, blk->name); cs_dsp_dbg(dsp, "# of coefficient descriptors: %#x\n", blk->ncoeff); + + return pos; } -static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data, - struct cs_dsp_coeff_parsed_coeff *blk) +static int cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, + const struct wmfw_region *region, + unsigned int pos, + struct cs_dsp_coeff_parsed_coeff *blk) { const struct wmfw_adsp_coeff_data *raw; + unsigned int data_len = le32_to_cpu(region->len); + unsigned int blk_len, blk_end_pos; const u8 *tmp; - int length; + + raw = (const struct wmfw_adsp_coeff_data *)®ion->data[pos]; + if (sizeof(raw->hdr) > (data_len - pos)) + return -EOVERFLOW; + + blk_len = le32_to_cpu(raw->hdr.size); + if (blk_len > S32_MAX) + return -EOVERFLOW; + + if (blk_len > (data_len - pos - sizeof(raw->hdr))) + return -EOVERFLOW; + + blk_end_pos = pos + sizeof(raw->hdr) + blk_len; + + blk->offset = le16_to_cpu(raw->hdr.offset); + blk->mem_type = le16_to_cpu(raw->hdr.type); switch (dsp->fw_ver) { case 0: case 1: - raw = (const struct wmfw_adsp_coeff_data *)*data; - *data = *data + sizeof(raw->hdr) + le32_to_cpu(raw->hdr.size); + if (sizeof(*raw) > (data_len - pos)) + return -EOVERFLOW; - blk->offset = le16_to_cpu(raw->hdr.offset); - blk->mem_type = le16_to_cpu(raw->hdr.type); blk->name = raw->name; - blk->name_len = strlen(raw->name); + blk->name_len = strnlen(raw->name, ARRAY_SIZE(raw->name)); blk->ctl_type = le16_to_cpu(raw->ctl_type); blk->flags = le16_to_cpu(raw->flags); blk->len = le32_to_cpu(raw->len); break; default: - tmp = *data; - blk->offset = cs_dsp_coeff_parse_int(sizeof(raw->hdr.offset), &tmp); - blk->mem_type = cs_dsp_coeff_parse_int(sizeof(raw->hdr.type), &tmp); - length = cs_dsp_coeff_parse_int(sizeof(raw->hdr.size), &tmp); - blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, + pos += sizeof(raw->hdr); + tmp = ®ion->data[pos]; + blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos, &blk->name); - cs_dsp_coeff_parse_string(sizeof(u8), &tmp, NULL); - cs_dsp_coeff_parse_string(sizeof(u16), &tmp, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + cs_dsp_coeff_parse_string(sizeof(u8), &tmp, data_len - pos, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + cs_dsp_coeff_parse_string(sizeof(u16), &tmp, data_len - pos, NULL); + if (!tmp) + return -EOVERFLOW; + + pos = tmp - region->data; + if (sizeof(raw->ctl_type) + sizeof(raw->flags) + sizeof(raw->len) > + (data_len - pos)) + return -EOVERFLOW; + blk->ctl_type = cs_dsp_coeff_parse_int(sizeof(raw->ctl_type), &tmp); + pos += sizeof(raw->ctl_type); blk->flags = cs_dsp_coeff_parse_int(sizeof(raw->flags), &tmp); + pos += sizeof(raw->flags); blk->len = cs_dsp_coeff_parse_int(sizeof(raw->len), &tmp); - - *data = *data + sizeof(raw->hdr) + length; break; } @@ -1224,6 +1300,8 @@ static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data, cs_dsp_dbg(dsp, "\tCoefficient flags: %#x\n", blk->flags); cs_dsp_dbg(dsp, "\tALSA control type: %#x\n", blk->ctl_type); cs_dsp_dbg(dsp, "\tALSA control len: %#x\n", blk->len); + + return blk_end_pos; } static int cs_dsp_check_coeff_flags(struct cs_dsp *dsp, @@ -1247,12 +1325,16 @@ static int cs_dsp_parse_coeff(struct cs_dsp *dsp, struct cs_dsp_alg_region alg_region = {}; struct cs_dsp_coeff_parsed_alg alg_blk; struct cs_dsp_coeff_parsed_coeff coeff_blk; - const u8 *data = region->data; - int i, ret; + int i, pos, ret; + + pos = cs_dsp_coeff_parse_alg(dsp, region, &alg_blk); + if (pos < 0) + return pos; - cs_dsp_coeff_parse_alg(dsp, &data, &alg_blk); for (i = 0; i < alg_blk.ncoeff; i++) { - cs_dsp_coeff_parse_coeff(dsp, &data, &coeff_blk); + pos = cs_dsp_coeff_parse_coeff(dsp, region, pos, &coeff_blk); + if (pos < 0) + return pos; switch (coeff_blk.ctl_type) { case WMFW_CTL_TYPE_BYTES: @@ -1321,6 +1403,10 @@ static unsigned int cs_dsp_adsp1_parse_sizes(struct cs_dsp *dsp, const struct wmfw_adsp1_sizes *adsp1_sizes; adsp1_sizes = (void *)&firmware->data[pos]; + if (sizeof(*adsp1_sizes) > firmware->size - pos) { + cs_dsp_err(dsp, "%s: file truncated\n", file); + return 0; + } cs_dsp_dbg(dsp, "%s: %d DM, %d PM, %d ZM\n", file, le32_to_cpu(adsp1_sizes->dm), le32_to_cpu(adsp1_sizes->pm), @@ -1337,6 +1423,10 @@ static unsigned int cs_dsp_adsp2_parse_sizes(struct cs_dsp *dsp, const struct wmfw_adsp2_sizes *adsp2_sizes; adsp2_sizes = (void *)&firmware->data[pos]; + if (sizeof(*adsp2_sizes) > firmware->size - pos) { + cs_dsp_err(dsp, "%s: file truncated\n", file); + return 0; + } cs_dsp_dbg(dsp, "%s: %d XM, %d YM %d PM, %d ZM\n", file, le32_to_cpu(adsp2_sizes->xm), le32_to_cpu(adsp2_sizes->ym), @@ -1376,7 +1466,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, struct regmap *regmap = dsp->regmap; unsigned int pos = 0; const struct wmfw_header *header; - const struct wmfw_adsp1_sizes *adsp1_sizes; const struct wmfw_footer *footer; const struct wmfw_region *region; const struct cs_dsp_region *mem; @@ -1392,10 +1481,8 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, ret = -EINVAL; - pos = sizeof(*header) + sizeof(*adsp1_sizes) + sizeof(*footer); - if (pos >= firmware->size) { - cs_dsp_err(dsp, "%s: file too short, %zu bytes\n", - file, firmware->size); + if (sizeof(*header) >= firmware->size) { + ret = -EOVERFLOW; goto out_fw; } @@ -1423,22 +1510,36 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, pos = sizeof(*header); pos = dsp->ops->parse_sizes(dsp, file, pos, firmware); + if ((pos == 0) || (sizeof(*footer) > firmware->size - pos)) { + ret = -EOVERFLOW; + goto out_fw; + } footer = (void *)&firmware->data[pos]; pos += sizeof(*footer); if (le32_to_cpu(header->len) != pos) { - cs_dsp_err(dsp, "%s: unexpected header length %d\n", - file, le32_to_cpu(header->len)); + ret = -EOVERFLOW; goto out_fw; } cs_dsp_dbg(dsp, "%s: timestamp %llu\n", file, le64_to_cpu(footer->timestamp)); - while (pos < firmware->size && - sizeof(*region) < firmware->size - pos) { + while (pos < firmware->size) { + /* Is there enough data for a complete block header? */ + if (sizeof(*region) > firmware->size - pos) { + ret = -EOVERFLOW; + goto out_fw; + } + region = (void *)&(firmware->data[pos]); + + if (le32_to_cpu(region->len) > firmware->size - pos - sizeof(*region)) { + ret = -EOVERFLOW; + goto out_fw; + } + region_name = "Unknown"; reg = 0; text = NULL; @@ -1495,16 +1596,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, regions, le32_to_cpu(region->len), offset, region_name); - if (le32_to_cpu(region->len) > - firmware->size - pos - sizeof(*region)) { - cs_dsp_err(dsp, - "%s.%d: %s region len %d bytes exceeds file length %zu\n", - file, regions, region_name, - le32_to_cpu(region->len), firmware->size); - ret = -EINVAL; - goto out_fw; - } - if (text) { memcpy(text, region->data, le32_to_cpu(region->len)); cs_dsp_info(dsp, "%s: %s\n", file, text); @@ -1555,6 +1646,9 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, cs_dsp_buf_free(&buf_list); kfree(text); + if (ret == -EOVERFLOW) + cs_dsp_err(dsp, "%s: file content overflows file data\n", file); + return ret; } @@ -2122,10 +2216,20 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware pos = le32_to_cpu(hdr->len); blocks = 0; - while (pos < firmware->size && - sizeof(*blk) < firmware->size - pos) { + while (pos < firmware->size) { + /* Is there enough data for a complete block header? */ + if (sizeof(*blk) > firmware->size - pos) { + ret = -EOVERFLOW; + goto out_fw; + } + blk = (void *)(&firmware->data[pos]); + if (le32_to_cpu(blk->len) > firmware->size - pos - sizeof(*blk)) { + ret = -EOVERFLOW; + goto out_fw; + } + type = le16_to_cpu(blk->type); offset = le16_to_cpu(blk->offset); version = le32_to_cpu(blk->ver) >> 8; @@ -2222,17 +2326,6 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware } if (reg) { - if (le32_to_cpu(blk->len) > - firmware->size - pos - sizeof(*blk)) { - cs_dsp_err(dsp, - "%s.%d: %s region len %d bytes exceeds file length %zu\n", - file, blocks, region_name, - le32_to_cpu(blk->len), - firmware->size); - ret = -EINVAL; - goto out_fw; - } - buf = cs_dsp_buf_alloc(blk->data, le32_to_cpu(blk->len), &buf_list); @@ -2272,6 +2365,10 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware regmap_async_complete(regmap); cs_dsp_buf_free(&buf_list); kfree(text); + + if (ret == -EOVERFLOW) + cs_dsp_err(dsp, "%s: file content overflows file data\n", file); + return ret; } diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 880ffcb500887..921f61507ae83 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -101,8 +101,10 @@ static __init struct device *sysfb_parent_dev(const struct screen_info *si) if (IS_ERR(pdev)) { return ERR_CAST(pdev); } else if (pdev) { - if (!sysfb_pci_dev_is_enabled(pdev)) + if (!sysfb_pci_dev_is_enabled(pdev)) { + pci_dev_put(pdev); return ERR_PTR(-ENODEV); + } return &pdev->dev; } @@ -137,7 +139,7 @@ static __init int sysfb_init(void) if (compatible) { pd = sysfb_create_simplefb(si, &mode, parent); if (!IS_ERR(pd)) - goto unlock_mutex; + goto put_device; } /* if the FB is incompatible, create a legacy framebuffer device */ @@ -155,7 +157,7 @@ static __init int sysfb_init(void) pd = platform_device_alloc(name, 0); if (!pd) { ret = -ENOMEM; - goto unlock_mutex; + goto put_device; } pd->dev.parent = parent; @@ -170,9 +172,11 @@ static __init int sysfb_init(void) if (ret) goto err; - goto unlock_mutex; + goto put_device; err: platform_device_put(pd); +put_device: + put_device(parent); unlock_mutex: mutex_unlock(&disable_lock); return ret; diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index 71e1af7c21847..d89e78f0ead31 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c @@ -619,8 +619,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, ret = gpiochip_get_ngpios(gc, dev); if (ret) gc->ngpio = gc->bgpio_bits; - else - gc->bgpio_bits = roundup_pow_of_two(round_up(gc->ngpio, 8)); ret = bgpio_setup_io(gc, dat, set, clr, flags); if (ret) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index d75f6ee370282..89d5e64cf68bf 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -202,6 +202,24 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np, * helper, and be consistent with what other drivers do. */ { "qi,lb60", "rb-gpios", true }, +#endif +#if IS_ENABLED(CONFIG_PCI_LANTIQ) + /* + * According to the PCI specification, the RST# pin is an + * active-low signal. However, most of the device trees that + * have been widely used for a long time incorrectly describe + * reset GPIO as active-high, and were also using wrong name + * for the property. + */ + { "lantiq,pci-xway", "gpio-reset", false }, +#endif +#if IS_ENABLED(CONFIG_TOUCHSCREEN_TSC2005) + /* + * DTS for Nokia N900 incorrectly specified "active high" + * polarity for the reset line, while the chip actually + * treats it as "active low". + */ + { "ti,tsc2005", "reset-gpios", false }, #endif }; unsigned int i; @@ -504,9 +522,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, { "reset", "reset-n-io", "marvell,nfc-uart" }, { "reset", "reset-n-io", "mrvl,nfc-uart" }, #endif -#if !IS_ENABLED(CONFIG_PCI_LANTIQ) +#if IS_ENABLED(CONFIG_PCI_LANTIQ) /* MIPS Lantiq PCI */ - { "reset", "gpios-reset", "lantiq,pci-xway" }, + { "reset", "gpio-reset", "lantiq,pci-xway" }, #endif /* diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 0051fb1b437fb..35c20e373b01b 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -55,11 +55,11 @@ config DRM_AMDGPU_CIK Choose this option if you want to enable support for CIK (Sea Islands) asics. - CIK is already supported in radeon. Support for CIK in amdgpu - will be disabled by default and is still provided by radeon. - Use module options to override this: + CIK is already supported in radeon. If you enable this option, + support for CIK will be provided by amdgpu and disabled in + radeon by default. Use module options to override this: - radeon.cik_support=0 amdgpu.cik_support=1 + radeon.cik_support=1 amdgpu.cik_support=0 config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 38408e4e158e5..45f3661afa023 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -23,9 +23,9 @@ # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. -FULL_AMD_PATH=$(src)/.. -DISPLAY_FOLDER_NAME=display -FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) +FULL_AMD_PATH := $(patsubst %/amdgpu,%,$(src)) +DISPLAY_FOLDER_NAME := display +FULL_AMD_DISPLAY_PATH := $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/include \ @@ -39,23 +39,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \ -I$(FULL_AMD_PATH)/amdkfd -subdir-ccflags-y := -Wextra -subdir-ccflags-y += -Wunused -subdir-ccflags-y += -Wmissing-prototypes -subdir-ccflags-y += -Wmissing-declarations -subdir-ccflags-y += -Wmissing-include-dirs -subdir-ccflags-y += -Wold-style-definition -subdir-ccflags-y += -Wmissing-format-attribute -# Need this to avoid recursive variable evaluation issues -cond-flags := $(call cc-option, -Wunused-but-set-variable) \ - $(call cc-option, -Wunused-const-variable) \ - $(call cc-option, -Wstringop-truncation) \ - $(call cc-option, -Wpacked-not-aligned) -subdir-ccflags-y += $(cond-flags) -subdir-ccflags-y += -Wno-unused-parameter -subdir-ccflags-y += -Wno-type-limits -subdir-ccflags-y += -Wno-sign-compare -subdir-ccflags-y += -Wno-missing-field-initializers +# Locally disable W=1 warnings enabled in drm subsystem Makefile subdir-ccflags-y += -Wno-override-init subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror @@ -76,7 +60,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o amdgpu_hdp.o \ - amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o amdgpu_sem.o amdgpu_gmc.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ @@ -282,7 +266,8 @@ amdgpu-y += \ amdgpu_amdkfd_gfx_v10.o \ amdgpu_amdkfd_gfx_v10_3.o \ amdgpu_amdkfd_gfx_v11.o \ - amdgpu_amdkfd_gfx_v12.o + amdgpu_amdkfd_gfx_v12.o \ + amdgpu_amdkfd_rlc_spm.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) amdgpu-y += amdgpu_amdkfd_gfx_v7.o @@ -309,7 +294,7 @@ endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o +amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_hmm.o include $(FULL_AMD_PATH)/pm/Makefile @@ -317,7 +302,7 @@ amdgpu-y += $(AMD_POWERPLAY_FILES) ifneq ($(CONFIG_DRM_AMD_DC),) -RELATIVE_AMD_DISPLAY_PATH = ../$(DISPLAY_FOLDER_NAME) +RELATIVE_AMD_DISPLAY_PATH := ../$(DISPLAY_FOLDER_NAME) include $(FULL_AMD_DISPLAY_PATH)/Makefile amdgpu-y += $(AMD_DISPLAY_FILES) @@ -332,4 +317,8 @@ amdgpu-y += \ isp_v4_1_1.o endif +include $(FULL_AMD_PATH)/backport/Makefile + obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o + +CFLAGS_amdgpu_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index b0f95a7649bfe..15bb26b76ed48 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -85,7 +85,7 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_SDMA)) continue; - r = adev->ip_blocks[i].version->funcs->suspend(adev); + r = adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]); if (r) { dev_err(adev->dev, @@ -246,7 +246,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) dev_err(adev->dev, "Failed to get BIF handle\n"); return -EINVAL; } - r = cmn_block->version->funcs->resume(adev); + r = cmn_block->version->funcs->resume(cmn_block); if (r) return r; @@ -282,7 +282,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) continue; - r = adev->ip_blocks[i].version->funcs->resume(adev); + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); if (r) { dev_err(adev->dev, "resume of IP block <%s> failed %d\n", @@ -304,7 +304,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->funcs->late_init) { r = adev->ip_blocks[i].version->funcs->late_init( - (void *)adev); + &adev->ip_blocks[i]); if (r) { dev_err(adev->dev, "late_init of IP block <%s> failed %d after reset\n", @@ -344,6 +344,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, list_for_each_entry(tmp_adev, reset_device_list, reset_list) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + /*TBD: Ideally should clear only GFX, SDMA blocks*/ + amdgpu_ras_clear_err_state(tmp_adev); r = aldebaran_mode2_restore_ip(tmp_adev); if (r) goto end; @@ -417,6 +419,7 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler = { static struct amdgpu_reset_handler *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = { &aldebaran_mode2_handler, + &xgmi_reset_on_init_handler, }; int aldebaran_reset_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f3980b40f2cef..22c7e9669162e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -73,6 +73,7 @@ #include "amdgpu_sync.h" #include "amdgpu_ring.h" #include "amdgpu_vm.h" +#include "amdgpu_sem.h" #include "amdgpu_dpm.h" #include "amdgpu_acp.h" #include "amdgpu_uvd.h" @@ -118,6 +119,8 @@ #define MAX_GPU_INSTANCE 64 +#define GFX_SLICE_PERIOD_MS 250 + struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; @@ -129,10 +132,6 @@ struct amdgpu_mgpu_info { uint32_t num_gpu; uint32_t num_dgpu; uint32_t num_apu; - - /* delayed reset_func for XGMI configuration if necessary */ - struct delayed_work delayed_reset_work; - bool pending_reset; }; enum amdgpu_ss { @@ -189,6 +188,8 @@ extern int amdgpu_exp_hw_support; extern int amdgpu_dc; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; +extern int amdgpu_no_evict; +extern int amdgpu_direct_gma_size; extern uint amdgpu_pcie_gen_cap; extern uint amdgpu_pcie_lane_cap; extern u64 amdgpu_cg_mask; @@ -228,6 +229,7 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; +extern bool pcie_p2p; extern int amdgpu_mtype_local; extern bool enforce_isolation; #ifdef CONFIG_HSA_AMD @@ -235,15 +237,13 @@ extern int sched_policy; extern bool debug_evictions; extern bool no_system_mem_limit; extern int halt_if_hws_hang; +extern uint amdgpu_svm_default_granularity; #else static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ static const bool __maybe_unused no_system_mem_limit; static const int __maybe_unused halt_if_hws_hang; #endif -#ifdef CONFIG_HSA_AMD_P2P -extern bool pcie_p2p; -#endif extern int amdgpu_tmz; extern int amdgpu_reset_method; @@ -362,7 +362,7 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, u64 *flags); int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type); -bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, +bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, enum amd_ip_block_type block_type); #define AMDGPU_MAX_IP_NUM 16 @@ -386,6 +386,7 @@ struct amdgpu_ip_block_version { struct amdgpu_ip_block { struct amdgpu_ip_block_status status; const struct amdgpu_ip_block_version *version; + struct amdgpu_device *adev; }; int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, @@ -425,6 +426,10 @@ struct amdgpu_clock { uint32_t max_pixel_clock; }; +#if defined(AMDKCL_AMDGPU_DMABUF_OPS) +extern const struct dma_buf_ops amdgpu_dmabuf_ops; +#endif + /* sub-allocation manager, it has to be protected by another lock. * By conception this is an helper for other part of the driver * like the indirect buffer or semaphore, which both have their @@ -491,6 +496,8 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + spinlock_t sem_handles_lock; + struct idr sem_handles; /** GPU partition selection */ uint32_t xcp_id; }; @@ -560,6 +567,7 @@ enum amd_reset_method { AMD_RESET_METHOD_MODE2, AMD_RESET_METHOD_BACO, AMD_RESET_METHOD_PCI, + AMD_RESET_METHOD_ON_INIT, }; struct amdgpu_video_codec_info { @@ -650,6 +658,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *fi int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_gem_dgma_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + /* VRAM scratch page for HDP bug, default vram page */ struct amdgpu_mem_scratch { struct amdgpu_bo *robj; @@ -740,6 +751,14 @@ enum amd_hw_ip_block_type { #define IP_VERSION_SUBREV(ver) ((ver) & 0xF) #define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8) +struct amdgpu_direct_gma { + /* reserved in visible vram*/ + struct amdgpu_bo *dgma_bo; + atomic64_t vram_usage; + /* reserved in gart */ + atomic64_t gart_usage; +}; + struct amdgpu_ip_map_info { /* Map of logical to actual dev instances/mask */ uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE]; @@ -818,6 +837,24 @@ struct amdgpu_mqd { struct amdgpu_mqd_prop *p); }; +/* + * Custom Init levels could be defined for different situations where a full + * initialization of all hardware blocks are not expected. Sample cases are + * custom init sequences after resume after S0i3/S3, reset on initialization, + * partial reset of blocks etc. Presently, this defines only two levels. Levels + * are described in corresponding struct definitions - amdgpu_init_default, + * amdgpu_init_minimal_xgmi. + */ +enum amdgpu_init_lvl_id { + AMDGPU_INIT_LEVEL_DEFAULT, + AMDGPU_INIT_LEVEL_MINIMAL_XGMI, +}; + +struct amdgpu_init_level { + enum amdgpu_init_lvl_id level; + uint32_t hwini_ip_block_mask; +}; + #define AMDGPU_RESET_MAGIC_NUM 64 #define AMDGPU_MAX_DF_PERFMONS 4 struct amdgpu_reset_domain; @@ -868,6 +905,9 @@ struct amdgpu_device { uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; + /* Direct GMA */ + struct amdgpu_direct_gma direct_gma; + /* Register/doorbell mmio */ resource_size_t rmmio_base; resource_size_t rmmio_size; @@ -965,6 +1005,7 @@ struct amdgpu_device { bool ib_pool_ready; struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX]; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; + struct workqueue_struct *timeout_wq; /* interrupts */ struct amdgpu_irq irq; @@ -1080,10 +1121,6 @@ struct amdgpu_device { struct amdgpu_virt virt; - /* link all shadow bo */ - struct list_head shadow_list; - struct mutex shadow_list_lock; - /* record hw reset is performed */ bool has_hw_reset; u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; @@ -1099,7 +1136,9 @@ struct amdgpu_device { enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED struct mutex notifier_lock; +#endif int asic_reset_res; struct work_struct xgmi_reset_work; @@ -1162,6 +1201,13 @@ struct amdgpu_device { bool debug_disable_soft_recovery; bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; + bool debug_exp_resets; + + bool enforce_isolation[MAX_XCP]; + /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + struct mutex enforce_isolation_mutex; + + struct amdgpu_init_level *init_lvl; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1257,6 +1303,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context); +int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context); + int emu_soc_asic_init(struct amdgpu_device *adev); /* @@ -1408,7 +1456,14 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); +#ifdef AMDKCL_ENABLE_RESIZE_FB_BAR int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); +#else +static inline int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) +{ + return 0; +} +#endif void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); @@ -1446,23 +1501,15 @@ void amdgpu_register_atpx_handler(void); void amdgpu_unregister_atpx_handler(void); bool amdgpu_has_atpx_dgpu_power_cntl(void); bool amdgpu_is_atpx_hybrid(void); -bool amdgpu_atpx_dgpu_req_power_for_displays(void); bool amdgpu_has_atpx(void); #else static inline void amdgpu_register_atpx_handler(void) {} static inline void amdgpu_unregister_atpx_handler(void) {} static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; } static inline bool amdgpu_is_atpx_hybrid(void) { return false; } -static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; } static inline bool amdgpu_has_atpx(void) { return false; } #endif -#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI) -void *amdgpu_atpx_get_dhandle(void); -#else -static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; } -#endif - /* * KMS */ @@ -1484,6 +1531,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); int amdgpu_enable_vblank_kms(struct drm_crtc *crtc); void amdgpu_disable_vblank_kms(struct drm_crtc *crtc); + int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -1507,12 +1555,13 @@ struct amdgpu_afmt_acr { struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock); /* amdgpu_acpi.c */ - +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV struct amdgpu_numa_info { uint64_t size; int pxm; int nid; }; +#endif /* ATCS Device/Driver State */ #define AMDGPU_ATCS_PSC_DEV_STATE_D0 0 @@ -1531,17 +1580,22 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state); int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size); int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id, struct amdgpu_numa_info *numa_info); +#endif void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); void amdgpu_acpi_detect(void); +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV void amdgpu_acpi_release(void); +#endif #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size) { @@ -1553,10 +1607,13 @@ static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, { return -EINVAL; } +#endif static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline void amdgpu_acpi_detect(void) { } +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV static inline void amdgpu_acpi_release(void) { } +#endif static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } @@ -1575,13 +1632,6 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif -#if defined(CONFIG_DRM_AMD_DC) -int amdgpu_dm_display_resume(struct amdgpu_device *adev ); -#else -static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } -#endif - - void amdgpu_register_gpu_instance(struct amdgpu_device *adev); void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev); @@ -1623,4 +1673,6 @@ extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; extern const struct attribute_group amdgpu_flash_attr_group; +void amdgpu_set_init_level(struct amdgpu_device *adev, + enum amdgpu_init_lvl_id lvl); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 929095a2e0886..7b64c5b67e10b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -80,6 +80,9 @@ static void aca_banks_release(struct aca_banks *banks) { struct aca_bank_node *node, *tmp; + if (list_empty(&banks->list)) + return; + list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); @@ -508,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h return -EINVAL; } - /* udpate aca bank to aca source error_cache first */ + /* update aca bank to aca source error_cache first */ ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); if (ret) return ret; @@ -562,9 +565,13 @@ static void aca_error_fini(struct aca_error *aerr) struct aca_bank_error *bank_error, *tmp; mutex_lock(&aerr->lock); + if (list_empty(&aerr->list)) + goto out_unlock; + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) aca_bank_error_remove(aerr, bank_error); +out_unlock: mutex_destroy(&aerr->lock); } @@ -680,6 +687,9 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) { struct aca_handle *handle, *tmp; + if (list_empty(&mgr->list)) + return; + list_for_each_entry_safe(handle, tmp, &mgr->list, node) amdgpu_aca_remove_handle(handle); } @@ -889,7 +899,11 @@ static const struct file_operations aca_ue_dump_debug_fops = { .release = single_release, }; +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n"); +#else +DEFINE_SIMPLE_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n"); +#endif #endif void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index bf6c4a0d05252..b3a6470175baf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -87,9 +87,12 @@ #define ACP_TIMEOUT_LOOP 0x000000FF #define ACP_DEVS 4 #define ACP_SRC_ID 162 - static unsigned long acp_machine_id; +#ifndef DW_I2S_QUIRK_16BIT_IDX_OVERRIDE +#define DW_I2S_QUIRK_16BIT_IDX_OVERRIDE (1 << 2) +#endif + enum { ACP_TILE_P1 = 0, ACP_TILE_P2, @@ -98,9 +101,9 @@ enum { ACP_TILE_DSP2, }; -static int acp_sw_init(void *handle) +static int acp_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->acp.parent = adev->dev; @@ -112,9 +115,9 @@ static int acp_sw_init(void *handle) return 0; } -static int acp_sw_fini(void *handle) +static int acp_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->acp.cgs_device) amdgpu_cgs_destroy_device(adev->acp.cgs_device); @@ -222,7 +225,7 @@ static const struct dmi_system_id acp_quirk_table[] = { * @handle: handle used to pass amdgpu_device pointer * */ -static int acp_hw_init(void *handle) +static int acp_hw_init(struct amdgpu_ip_block *ip_block) { int r; u64 acp_base; @@ -230,13 +233,7 @@ static int acp_hw_init(void *handle) u32 count = 0; struct i2s_platform_data *i2s_pdata = NULL; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - const struct amdgpu_ip_block *ip_block = - amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP); - - if (!ip_block) - return -EINVAL; + struct amdgpu_device *adev = ip_block->adev; r = amd_acp_hw_init(adev->acp.cgs_device, ip_block->version->major, ip_block->version->minor); @@ -506,11 +503,11 @@ static int acp_hw_init(void *handle) * @handle: handle used to pass amdgpu_device pointer * */ -static int acp_hw_fini(void *handle) +static int acp_hw_fini(struct amdgpu_ip_block *ip_block) { u32 val = 0; u32 count = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* return early if no ACP */ if (!adev->acp.acp_genpd) { @@ -565,9 +562,9 @@ static int acp_hw_fini(void *handle) return 0; } -static int acp_suspend(void *handle) +static int acp_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* power up on suspend */ if (!adev->acp.acp_cell) @@ -575,9 +572,9 @@ static int acp_suspend(void *handle) return 0; } -static int acp_resume(void *handle) +static int acp_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* power down again on resume */ if (!adev->acp.acp_cell) @@ -585,22 +582,17 @@ static int acp_resume(void *handle) return 0; } -static int acp_early_init(void *handle) -{ - return 0; -} - static bool acp_is_idle(void *handle) { return true; } -static int acp_wait_for_idle(void *handle) +static int acp_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int acp_soft_reset(void *handle) +static int acp_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } @@ -624,7 +616,7 @@ static int acp_set_powergating_state(void *handle, static const struct amd_ip_funcs acp_ip_funcs = { .name = "acp_ip", - .early_init = acp_early_init, + .early_init = NULL, .late_init = NULL, .sw_init = acp_sw_init, .sw_fini = acp_sw_fini, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index f85ace0384d21..92634e1675267 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -39,6 +39,7 @@ #include "amd_acpi.h" #include "atom.h" +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV /* Declare GUID for AMD _DSM method for XCCs */ static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2, 0xb8, 0xb4, 0x45, 0x56, 0x2e, @@ -77,6 +78,7 @@ struct amdgpu_acpi_dev_info { }; struct list_head amdgpu_acpi_dev_list; +#endif struct amdgpu_atif_notification_cfg { bool enabled; @@ -843,6 +845,7 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta return r; } +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV #ifdef CONFIG_ACPI_NUMA static inline uint64_t amdgpu_acpi_get_numa_size(int nid) { @@ -1183,6 +1186,7 @@ int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id, return -ENOENT; } +#endif /* HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV */ /** * amdgpu_acpi_event - handle notify events @@ -1378,7 +1382,11 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) return false; #if IS_ENABLED(CONFIG_SUSPEND) +#ifdef HAVE_PM_SUSPEND_TARGET_STATE return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; +#else + return false; +#endif #else return true; #endif @@ -1437,9 +1445,12 @@ void amdgpu_acpi_detect(void) atif->backlight_caps.caps_valid = false; } +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV amdgpu_acpi_enumerate_xcc(); +#endif } +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV void amdgpu_acpi_release(void) { struct amdgpu_acpi_dev_info *dev_info, *dev_tmp; @@ -1467,6 +1478,7 @@ void amdgpu_acpi_release(void) kfree(dev_info); } } +#endif #if IS_ENABLED(CONFIG_SUSPEND) /** @@ -1478,8 +1490,12 @@ void amdgpu_acpi_release(void) */ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { +#ifdef HAVE_PM_SUSPEND_TARGET_STATE return !(adev->flags & AMD_IS_APU) || (pm_suspend_target_state == PM_SUSPEND_MEM); +#else + return true; +#endif } /** @@ -1491,9 +1507,13 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) */ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { +#ifdef HAVE_PM_SUSPEND_TARGET_STATE if (!(adev->flags & AMD_IS_APU) || (pm_suspend_target_state != PM_SUSPEND_TO_IDLE)) return false; +#else + return false; +#endif if (adev->asic_type < CHIP_RAVEN) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c272461d70a9a..f0ab00c2e1342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -42,6 +42,7 @@ */ uint64_t amdgpu_amdkfd_total_mem_size; +extern bool pcie_p2p; static bool kfd_initialized; int amdgpu_amdkfd_init(void) @@ -251,7 +252,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) { if (adev->kfd.dev) - kgd2kfd_suspend(adev->kfd.dev, run_pm); + kgd2kfd_suspend(adev->kfd.dev, run_pm, true); } int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) @@ -294,7 +295,7 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool cp_mqd_gfx9) + void **cpu_ptr, bool cp_mqd_gfx9, bool is_uswc_mode) { struct amdgpu_bo *bo = NULL; struct amdgpu_bo_param bp; @@ -305,7 +306,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, bp.size = size; bp.byte_align = PAGE_SIZE; bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + if (is_uswc_mode) + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + else + bp.flags = 0; bp.type = ttm_bo_type_kernel; bp.resv = NULL; bp.bo_ptr_size = sizeof(struct amdgpu_bo); @@ -515,9 +519,11 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, if (IS_ERR(dma_buf)) return PTR_ERR(dma_buf); +#if defined(AMDKCL_AMDGPU_DMABUF_OPS) if (dma_buf->ops != &amdgpu_dmabuf_ops) /* Can't handle non-graphics buffers */ goto out_put; +#endif obj = dma_buf->priv; if (obj->dev->driver != adev_to_drm(adev)->driver) @@ -555,6 +561,12 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, return r; } +uint64_t amdgpu_amdkfd_get_vram_usage(struct amdgpu_device *adev) +{ + + return ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); +} + uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, struct amdgpu_device *src) { @@ -783,22 +795,6 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type) -{ - if (!hub_type) { - if (adev->gfxhub.funcs->query_utcl2_poison_status) - return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } else { - if (adev->mmhub.funcs->query_utcl2_poison_status) - return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } -} - int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) { return kgd2kfd_check_and_lock_kfd(); @@ -887,3 +883,45 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, return r; } + +/* Stop scheduling on KFD */ +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_stop_sched(adev->kfd.dev, node_id); +} + +/* Start scheduling on KFD */ +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_start_sched(adev->kfd.dev, node_id); +} + +/* check if there are KFD queues active */ +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return false; + + return kgd2kfd_compute_active(adev->kfd.dev, node_id); +} + +/* Config CGTT_SQ_CLK_CTRL */ +int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, + bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable) +{ + int r; + + if (!adev->kfd.init_complete) + return 0; + + r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable, + reg_override_enable, perfmon_override_enable); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4ed49265c764f..d62bf78c4883a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -34,6 +34,7 @@ #include #include #include +#include "amdgpu_gfx.h" #include "amdgpu_sync.h" #include "amdgpu_vm.h" #include "amdgpu_xcp.h" @@ -69,8 +70,13 @@ struct kfd_mem_attachment { struct kgd_mem { struct mutex lock; struct amdgpu_bo *bo; + struct kfd_ipc_obj *ipc_obj; struct dma_buf *dmabuf; +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED struct hmm_range *range; +#else + struct page **user_pages; +#endif struct list_head attachments; /* protected by amdkfd_process_info.lock */ struct list_head validate_list; @@ -103,6 +109,7 @@ struct amdgpu_kfd_dev { struct kfd_dev *dev; int64_t vram_used[MAX_XCP]; uint64_t vram_used_aligned[MAX_XCP]; + atomic64_t vram_pinned; bool init_complete; struct work_struct reset_work; @@ -193,8 +200,12 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem); +#else +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); +#endif int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, uint32_t domain, struct dma_fence *fence); @@ -218,8 +229,12 @@ int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) } static inline +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem) +#else +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +#endif { return 0; } @@ -234,7 +249,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool mqd_gfx9); + void **cpu_ptr, bool mqd_gfx9, bool is_uswc_mode); void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj); @@ -254,6 +269,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags, int8_t *xcp_id); +uint64_t amdgpu_amdkfd_get_vram_usage(struct amdgpu_device *adev); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, struct amdgpu_device *src); int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, @@ -264,6 +280,12 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, uint32_t *payload); int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, u32 inst); +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); +int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, + bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id); + /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -325,15 +347,64 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, - struct dma_fence __rcu **ef); + struct dma_fence **ef); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); + +struct amdgpu_bo *amdgpu_amdkfd_gpuvm_get_bo_ref(struct kgd_mem *mem, + uint32_t *flags); +void amdgpu_amdkfd_gpuvm_put_bo_ref(struct amdgpu_bo *bo); + +/** + * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria + * @bo: Handle of buffer object being pinned + * @domain: Domain into which BO should be pinned + * + * - USERPTR BOs are UNPINNABLE and will return error + * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + * PIN count incremented. It is valid to PIN a BO multiple times + * + * Return: ZERO if successful in pinning, Non-Zero in case of error. + */ +int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain); + +/** + * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria + * @bo: Handle of buffer object being unpinned + * + * - Is a illegal request for USERPTR BOs and is ignored + * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their + * PIN count decremented. Calls to UNPIN must balance calls to PIN + */ +void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo); + +int amdgpu_amdkfd_gpuvm_get_sg_table(struct amdgpu_device *adev, + struct amdgpu_bo *bo, uint32_t flags, + uint64_t offset, uint64_t size, + struct device *dma_dev, enum dma_data_direction dir, + struct sg_table **ret_sg); +void amdgpu_amdkfd_gpuvm_put_sg_table(struct amdgpu_bo *bo, + struct device *dma_dev, enum dma_data_direction dir, + struct sg_table *sg); + +int amdgpu_amdkfd_gpuvm_import_ipcobj(struct amdgpu_device *adev, + struct dma_buf *dmabuf, + struct kfd_ipc_obj *ipc_obj, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, struct dma_buf **dmabuf); +int amdgpu_amdkfd_gpuvm_export_ipc_obj(struct amdgpu_device *adev, void *vm, + struct kgd_mem *mem, + struct kfd_ipc_obj **ipc_obj, + uint32_t flags, + uint32_t *restore_handle); + void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); @@ -348,8 +419,6 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, @@ -394,6 +463,13 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) } #endif +void amdgpu_amdkfd_rlc_spm_cntl(struct amdgpu_device *adev, bool cntl); +int amdgpu_amdkfd_rlc_spm_acquire(struct amdgpu_device *adev, + struct amdgpu_vm *vm, u64 gpu_addr, u32 size); +void amdgpu_amdkfd_rlc_spm_release(struct amdgpu_device *adev, struct amdgpu_vm *vm); +void amdgpu_amdkfd_rlc_spm_set_rdptr(struct amdgpu_device *adev, u32 rptr); +void amdgpu_amdkfd_rlc_spm_interrupt(struct amdgpu_device *adev); + #if IS_ENABLED(CONFIG_HSA_AMD_SVM) int kgd2kfd_init_zone_device(struct amdgpu_device *adev); #else @@ -405,6 +481,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) #endif /* KGD2KFD callbacks */ +void kgd2kfd_spm_interrupt(struct kfd_dev *kfd); int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, @@ -416,7 +493,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); +void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm, bool force); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd, struct amdgpu_reset_context *reset_context); @@ -426,6 +503,9 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -453,7 +533,7 @@ static inline void kgd2kfd_device_exit(struct kfd_dev *kfd) { } -static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm, bool force) { } @@ -496,5 +576,20 @@ static inline int kgd2kfd_check_and_lock_kfd(void) static inline void kgd2kfd_unlock_kfd(void) { } + +static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} + +static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} + +static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) +{ + return false; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 8dfdb18197c49..d3fb05521580e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -26,6 +26,7 @@ #include "amdgpu_amdkfd_aldebaran.h" #include "gc/gc_9_4_2_offset.h" #include "gc/gc_9_4_2_sh_mask.h" +#include "soc15.h" #include /* @@ -163,6 +164,17 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch( return watch_address_cntl; } +static uint32_t kgd_aldebaran_trigger_pc_sample_trap(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t *target_simd, + uint32_t *target_wave_slot, + enum kfd_ioctl_pc_sample_method method, + uint32_t inst) +{ + return kgd_gfx_v9_trigger_pc_sample_trap(adev, vmid, 8, 4, + target_simd, target_wave_slot, method, inst); +} + const struct kfd2kgd_calls aldebaran_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -193,4 +205,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, .hqd_reset = kgd_gfx_v9_hqd_reset, + .trigger_pc_sample_trap = kgd_aldebaran_trigger_pc_sample_trap, + .override_core_cg = kgd_gfx_v9_override_core_cg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 017e8a3013aaa..bb95ee172014b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ #include -#include #include #include #include "amdgpu.h" @@ -390,6 +389,18 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev, return 0; } + +static uint32_t kgd_arcturus_trigger_pc_sample_trap(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t *target_simd, + uint32_t *target_wave_slot, + enum kfd_ioctl_pc_sample_method method, + uint32_t inst) +{ + return kgd_gfx_v9_trigger_pc_sample_trap(adev, vmid, 10, 4, + target_simd, target_wave_slot, method, inst); +} + const struct kfd2kgd_calls arcturus_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -420,5 +431,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v9_hqd_reset + .hqd_reset = kgd_gfx_v9_hqd_reset, + .trigger_pc_sample_trap = kgd_arcturus_trigger_pc_sample_trap, + .override_core_cg = kgd_gfx_v9_override_core_cg }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 1ef758ac5076e..85e560df7f6b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -184,5 +184,6 @@ static const struct dma_fence_ops amdkfd_fence_ops = { .get_driver_name = amdkfd_fence_get_driver_name, .get_timeline_name = amdkfd_fence_get_timeline_name, .enable_signaling = amdkfd_fence_enable_signaling, + AMDKCL_DMA_FENCE_OPS_WAIT_OPTIONAL .release = amdkfd_fence_release, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index e2ae714a700f8..53b2df2a1637c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -509,6 +509,17 @@ static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev, return 0; } +static uint32_t kgd_v9_4_3_trigger_pc_sample_trap(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t *target_simd, + uint32_t *target_wave_slot, + enum kfd_ioctl_pc_sample_method method, + uint32_t inst) +{ + return kgd_gfx_v9_trigger_pc_sample_trap(adev, vmid, 8, 4, + target_simd, target_wave_slot, method, inst); +} + const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, @@ -543,5 +554,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v9_hqd_reset + .hqd_reset = kgd_gfx_v9_hqd_reset, + .trigger_pc_sample_trap = kgd_v9_4_3_trigger_pc_sample_trap, + .override_core_cg = kgd_gfx_v9_override_core_cg }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index c63528a4e8941..9e0e664dbc4f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -944,34 +944,34 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) * * @adev: Handle of device whose registers are to be read * @queue_idx: Index of queue in the queue-map bit-field - * @wave_cnt: Output parameter updated with number of waves in flight - * @vmid: Output parameter updated with VMID of queue whose wave count - * is being collected + * @queue_cnt: Stores the wave count and doorbell offset for an active queue * @inst: xcc's instance number on a multi-XCC setup */ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, - int *wave_cnt, int *vmid, uint32_t inst) + struct kfd_cu_occupancy *queue_cnt, uint32_t inst) { int pipe_idx; int queue_slot; unsigned int reg_val; - + unsigned int wave_cnt; /* * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID * parameters to read out waves in flight. Get VMID if there are * non-zero waves in flight. */ - *vmid = 0xFF; - *wave_cnt = 0; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; - soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + - queue_slot); - *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; - if (*wave_cnt != 0) - *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) & - CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst)); + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); + wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; + if (wave_cnt != 0) { + queue_cnt->wave_cnt += wave_cnt; + queue_cnt->doorbell_off = + (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) & + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + } } /** @@ -981,9 +981,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * or more queues running and submitting waves to compute units. * * @adev: Handle of device from which to get number of waves in flight - * @pasid: Identifies the process for which this query call is invoked - * @pasid_wave_cnt: Output parameter updated with number of waves in flight that - * belong to process with given pasid + * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset + * for comparison later. * @max_waves_per_cu: Output parameter updated with maximum number of waves * possible per Compute Unit * @inst: xcc's instance number on a multi-XCC setup @@ -1011,34 +1010,28 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * number of waves that are in flight for the queue at specified index. The * index ranges from 0 to 7. * - * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID - * of the wave(s). + * If non-zero waves are in flight, store the corresponding doorbell offset + * of the queue, along with the wave count. * - * Determine if VMID from above step maps to pasid provided as parameter. If - * it matches agrregate the wave count. That the VMID will not match pasid is - * a normal condition i.e. a device is expected to support multiple queues - * from multiple proceses. + * Determine if the queue belongs to the process by comparing the doorbell + * offset against the process's queues. If it matches, aggregate the wave + * count for the process. * * Reading registers referenced above involves programming GRBM appropriately */ -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst) +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst) { int qidx; - int vmid; int se_idx; - int sh_idx; int se_cnt; - int sh_cnt; - int wave_cnt; int queue_map; - int pasid_tmp; int max_queue_cnt; - int vmid_wave_cnt = 0; DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); lock_spi_csq_mutexes(adev); - soc15_grbm_select(adev, 1, 0, 0, 0, inst); + soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst)); /* * Iterate through the shader engines and arrays of the device @@ -1048,51 +1041,38 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, AMDGPU_MAX_QUEUES); max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - sh_cnt = adev->gfx.config.max_sh_per_se; se_cnt = adev->gfx.config.max_shader_engines; for (se_idx = 0; se_idx < se_cnt; se_idx++) { - for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { + amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst); + queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS); + + /* + * Assumption: queue map encodes following schema: four + * pipes per each micro-engine, with each pipe mapping + * eight queues. This schema is true for GFX9 devices + * and must be verified for newer device families + */ + for (qidx = 0; qidx < max_queue_cnt; qidx++) { + /* Skip qeueus that are not associated with + * compute functions + */ + if (!test_bit(qidx, cp_queue_bitmap)) + continue; - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst); - queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); + if (!(queue_map & (1 << qidx))) + continue; - /* - * Assumption: queue map encodes following schema: four - * pipes per each micro-engine, with each pipe mapping - * eight queues. This schema is true for GFX9 devices - * and must be verified for newer device families - */ - for (qidx = 0; qidx < max_queue_cnt; qidx++) { - - /* Skip qeueus that are not associated with - * compute functions - */ - if (!test_bit(qidx, cp_queue_bitmap)) - continue; - - if (!(queue_map & (1 << qidx))) - continue; - - /* Get number of waves in flight and aggregate them */ - get_wave_count(adev, qidx, &wave_cnt, &vmid, - inst); - if (wave_cnt != 0) { - pasid_tmp = - RREG32(SOC15_REG_OFFSET(OSSSYS, inst, - mmIH_VMID_0_LUT) + vmid); - if (pasid_tmp == pasid) - vmid_wave_cnt += wave_cnt; - } - } + /* Get number of waves in flight and aggregate them */ + get_wave_count(adev, qidx, &cu_occupancy[qidx], + inst); } } amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst); - soc15_grbm_select(adev, 0, 0, 0, 0, inst); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst)); unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ - *pasid_wave_cnt = vmid_wave_cnt; *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * adev->gfx.cu_info.max_waves_per_simd; } @@ -1243,6 +1223,97 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, return queue_addr; } +static uint32_t kgd_aldebaran_get_hosttrap_status(struct amdgpu_device *adev, + uint32_t inst) +{ + uint32_t sq_hosttrap_status = 0x0; + int i, j; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, inst); + sq_hosttrap_status = RREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_HOSTTRAP_STATUS); + + if (sq_hosttrap_status & SQ_HOSTTRAP_STATUS__HTPENDING_OVERRIDE_MASK) { + WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_HOSTTRAP_STATUS, + SQ_HOSTTRAP_STATUS__HTPENDING_OVERRIDE_MASK); + sq_hosttrap_status = 0x0; + continue; + } + if (sq_hosttrap_status) + goto out; + } + } + +out: + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, inst); + mutex_unlock(&adev->grbm_idx_mutex); + + return sq_hosttrap_status; +} + +void kgd_gfx_v9_override_core_cg(struct amdgpu_device *adev, + uint32_t value, + uint32_t inst) +{ + uint32_t clk_cntl = 0; + + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, inst); + + RREG32_SOC15(GC, GET_INST(GC, inst), mmCGTT_SQ_CLK_CTRL); + clk_cntl = REG_SET_FIELD(clk_cntl, CGTT_SQ_CLK_CTRL, CORE_OVERRIDE, value); + WREG32_SOC15(GC, GET_INST(GC, inst), mmCGTT_SQ_CLK_CTRL, clk_cntl); + + mutex_unlock(&adev->grbm_idx_mutex); +} + +uint32_t kgd_gfx_v9_trigger_pc_sample_trap(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t max_wave_slot, + uint32_t max_simd, + uint32_t *target_simd, + uint32_t *target_wave_slot, + enum kfd_ioctl_pc_sample_method method, + uint32_t inst) +{ + if (method == KFD_IOCTL_PCS_METHOD_HOSTTRAP) { + uint32_t value = 0; + uint32_t sq_hosttrap_status = 0x0; + + sq_hosttrap_status = kgd_aldebaran_get_hosttrap_status(adev, inst); + /* skip when last host trap request is still pending to complete */ + if (sq_hosttrap_status) + return 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, SQ_IND_CMD_CMD_TRAP); + value = REG_SET_FIELD(value, SQ_CMD, MODE, SQ_IND_CMD_MODE_SINGLE); + + /* select *target_simd */ + value = REG_SET_FIELD(value, SQ_CMD, SIMD_ID, *target_simd); + /* select *target_wave_slot */ + value = REG_SET_FIELD(value, SQ_CMD, WAVE_ID, (*target_wave_slot)++); + /* set TrapID 4 for HOSTTRAP */ + value = REG_SET_FIELD(value, SQ_CMD, DATA, 0x4); + + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, inst); + WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, value); + mutex_unlock(&adev->grbm_idx_mutex); + + *target_wave_slot %= max_wave_slot; + if (!(*target_wave_slot)) { + (*target_simd)++; + *target_simd %= max_simd; + } + } else { + pr_debug("PC Sampling method %d not supported.", method); + return -EOPNOTSUPP; + } + return 0; +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 988c50ac3be01..ad9b26be3bb4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst); +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); @@ -110,3 +111,14 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint32_t queue_id, uint32_t inst, unsigned int utimeout); +uint32_t kgd_gfx_v9_trigger_pc_sample_trap(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t max_wave_slot, + uint32_t max_simd, + uint32_t *target_simd, + uint32_t *target_wave_slot, + enum kfd_ioctl_pc_sample_method method, + uint32_t inst); +void kgd_gfx_v9_override_core_cg(struct amdgpu_device *adev, + uint32_t value, + uint32_t inst); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d5fd371d5ce8..105c3829bb98c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -36,6 +36,7 @@ #include "amdgpu_hmm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" +#include "kfd_ipc.h" #include #include "amdgpu_xgmi.h" #include "kfd_priv.h" @@ -222,7 +223,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) { + vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size) + + atomic64_read(&adev->kfd.vram_pinned))) { ret = -ENOMEM; goto release; } @@ -330,7 +332,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev, ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1, AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags, - ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0); + ttm_bo_type_sg, amdkcl_ttm_resvp(&mem->bo->tbo), &gem_obj, 0); amdgpu_bo_unreserve(mem->bo); @@ -365,7 +367,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, * table update and TLB flush here directly. */ replacement = dma_fence_get_stub(); - dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, + dma_resv_replace_fences(amdkcl_ttm_resvp(&bo->tbo), ef->base.context, replacement, DMA_RESV_USAGE_BOOKKEEP); dma_fence_put(replacement); return 0; @@ -399,9 +401,9 @@ int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) ef = container_of(dma_fence_get(&info->eviction_fence->base), struct amdgpu_amdkfd_fence, base); - BUG_ON(!dma_resv_trylock(bo->tbo.base.resv)); + BUG_ON(!dma_resv_trylock(amdkcl_ttm_resvp(&bo->tbo))); ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); - dma_resv_unlock(bo->tbo.base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(&bo->tbo)); dma_fence_put(&ef->base); return ret; @@ -446,11 +448,11 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, if (ret) goto unreserve_out; - ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(&bo->tbo), 1); if (ret) goto unreserve_out; - dma_resv_add_fence(bo->tbo.base.resv, fence, + dma_resv_add_fence(amdkcl_ttm_resvp(&bo->tbo), fence, DMA_RESV_USAGE_BOOKKEEP); unreserve_out: @@ -818,18 +820,26 @@ static int kfd_mem_export_dmabuf(struct kgd_mem *mem) if (!mem->dmabuf) { struct amdgpu_device *bo_adev; struct dma_buf *dmabuf; +#ifndef HAVE_DRM_GEM_PRIME_HANDLE_TO_DMABUF int r, fd; bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file, mem->gem_handle, - mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, &fd); if (r) return r; dmabuf = dma_buf_get(fd); close_fd(fd); - if (WARN_ON_ONCE(IS_ERR(dmabuf))) +#else + bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file, + mem->gem_handle, + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DRM_RDWR : 0); +#endif + if (IS_ERR(dmabuf)) return PTR_ERR(dmabuf); mem->dmabuf = dmabuf; } @@ -837,6 +847,7 @@ static int kfd_mem_export_dmabuf(struct kgd_mem *mem) return 0; } +#ifdef AMDKCL_AMDGPU_DMABUF_OPS static int kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo) @@ -857,6 +868,39 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, return 0; } +#endif + +/** + * @kfd_mem_attach_vram_bo: Acquires the handle of a VRAM BO that could + * be used to enable a peer GPU access it + * + * Implementation determines if access to VRAM BO would employ DMABUF + * or Shared BO mechanism. Employ DMABUF mechanism if kernel has config + * option HSA_AMD_P2P enabled. Employ Shared BO mechanism if above + * config option is not set. It is important to note that a Shared BO + * cannot be used to enable peer acces if system has IOMMU enabled + * + * @TODO: ADD Check to ensure IOMMU is not enabled. Should this check + * be somewhere as this is information could be useful in other places + */ +static int kfd_mem_attach_vram_bo(struct amdgpu_device *adev, + struct kgd_mem *mem, struct amdgpu_bo **bo, + struct kfd_mem_attachment *attachment) +{ + int ret = 0; + +#ifdef CONFIG_HSA_AMD_P2P + attachment->type = KFD_MEM_ATT_DMABUF; + ret = kfd_mem_attach_dmabuf(adev, mem, bo); + pr_debug("Employ DMABUF mechanim to enable peer GPU access\n"); +#else + *bo = mem->bo; + attachment->type = KFD_MEM_ATT_SHARED; + drm_gem_object_get(&(*bo)->tbo.base); + pr_debug("Employ Shared BO mechanim to enable peer GPU access\n"); +#endif + return ret; +} /* kfd_mem_attach - Add a BO to a VM * @@ -943,22 +987,41 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL || mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP), "Handing invalid SG BO in ATTACH request"); - attachment[i]->type = KFD_MEM_ATT_SG; - ret = create_dmamap_sg_bo(adev, mem, &bo[i]); - if (ret) - goto unwind; - /* Enable acces to GTT and VRAM BOs of peer devices */ - } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT || - mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { + + if (kcl_has_dma_map_resource_ops(adev->dev)) { + attachment[i]->type = KFD_MEM_ATT_SG; + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); + if (ret) + goto unwind; + } else { + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = mem->bo; + drm_gem_object_get(&bo[i]->tbo.base); + } +#ifdef AMDKCL_AMDGPU_DMABUF_OPS + /* Enable acces to GTT BOs of peer devices */ + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT) { attachment[i]->type = KFD_MEM_ATT_DMABUF; ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); if (ret) goto unwind; pr_debug("Employ DMABUF mechanism to enable peer GPU access\n"); +#endif + /* Enable peer acces to VRAM BO's */ + } else if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { + ret = kfd_mem_attach_vram_bo(adev, mem, + &bo[i], attachment[i]); + if (ret) + goto unwind; } else { +#ifdef AMDKCL_AMDGPU_DMABUF_OPS WARN_ONCE(true, "Handling invalid ATTACH request"); ret = -EINVAL; goto unwind; +#endif + attachment[i]->type = KFD_MEM_ATT_SHARED; + bo[i] = mem->bo; + drm_gem_object_get(&bo[i]->tbo.base); } /* Add BO to VM internal data structures */ @@ -1092,6 +1155,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { if (ret == -EAGAIN) @@ -1100,6 +1164,29 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; } +#else + /* If no restore worker is running concurrently, user_pages + * should not be allocated + */ + WARN(mem->user_pages, "Leaking user_pages array"); + + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", __func__); + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo, mem->user_pages, NULL); + if (ret) { + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + goto free_out; + } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); +#endif ret = amdgpu_bo_reserve(bo, true); if (ret) { @@ -1113,7 +1200,15 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, amdgpu_bo_unreserve(bo); release_out: +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); +#else + if (ret) + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, NULL); +free_out: + kvfree(mem->user_pages); + mem->user_pages = NULL; +#endif unregister_out: if (ret) amdgpu_hmm_unregister(bo); @@ -1356,7 +1451,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info, vm_list_node) { struct amdgpu_bo *pd = peer_vm->root.bo; - ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv, + ret = amdgpu_sync_resv(NULL, sync, amdkcl_ttm_resvp(&pd->tbo), AMDGPU_SYNC_NE_OWNER, AMDGPU_FENCE_OWNER_KFD); if (ret) @@ -1432,10 +1527,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; - ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(&vm->root.bo->tbo), 1); if (ret) goto reserve_shared_fail; - dma_resv_add_fence(vm->root.bo->tbo.base.resv, + dma_resv_add_fence(amdkcl_ttm_resvp(&vm->root.bo->tbo), &vm->process_info->eviction_fence->base, DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(vm->root.bo); @@ -1480,7 +1575,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, * * Return: ZERO if successful in pinning, Non-Zero in case of error. */ -static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) +int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) { int ret = 0; @@ -1505,11 +1600,16 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) } } - ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0); + ret = amdgpu_bo_pin(bo, domain); if (ret) pr_err("Error in Pinning BO to domain: %d\n", domain); amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); + + if (!ret && bo->tbo.resource->mem_type == TTM_PL_VRAM) + atomic64_add(amdgpu_bo_size(bo), + &amdgpu_ttm_adev(bo->tbo.bdev)->kfd.vram_pinned); + out: amdgpu_bo_unreserve(bo); return ret; @@ -1523,7 +1623,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain) * - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their * PIN count decremented. Calls to UNPIN must balance calls to PIN */ -static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) +void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) { int ret = 0; @@ -1532,6 +1632,11 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) return; amdgpu_bo_unpin(bo); + + if (bo->tbo.resource->mem_type == TTM_PL_VRAM) + atomic64_sub(amdgpu_bo_size(bo), + &amdgpu_ttm_adev(bo->tbo.bdev)->kfd.vram_pinned); + amdgpu_bo_unreserve(bo); } @@ -1691,6 +1796,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) - adev->kfd.vram_used_aligned[xcp_id] - atomic64_read(&adev->vram_pin_size) + + atomic64_read(&adev->kfd.vram_pinned) - reserved_for_pt - reserved_for_ras; @@ -1953,9 +2059,20 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Cleanup user pages and MMU notifiers */ if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { amdgpu_hmm_unregister(mem->bo); +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED mutex_lock(&process_info->notifier_lock); amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); mutex_unlock(&process_info->notifier_lock); +#else + /* Free user pages if necessary */ + if (mem->user_pages) { + pr_debug("%s: Freeing user_pages array\n", __func__); + if (mem->user_pages[0]) + release_pages(mem->user_pages, + mem->bo->tbo.ttm->num_pages); + kvfree(mem->user_pages); + } +#endif } ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); @@ -2000,9 +2117,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( *size = 0; } + /* Unreference the ipc_obj if applicable */ + kfd_ipc_obj_put(&mem->ipc_obj); + /* Free the BO*/ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); - drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); + if (!mem->ipc_obj) + drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); if (mem->dmabuf) { dma_buf_put(mem->dmabuf); mem->dmabuf = NULL; @@ -2375,6 +2496,203 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, return 0; } +struct amdgpu_bo *amdgpu_amdkfd_gpuvm_get_bo_ref(struct kgd_mem *mem, + uint32_t *flags) +{ + struct amdgpu_bo *bo = mem->bo; + + if (flags) + *flags = mem->alloc_flags; + drm_gem_object_get(&bo->tbo.base); + return bo; +} + +void amdgpu_amdkfd_gpuvm_put_bo_ref(struct amdgpu_bo *bo) +{ + drm_gem_object_put(&bo->tbo.base); +} + +#define AMD_GPU_PAGE_SHIFT PAGE_SHIFT +#define AMD_GPU_PAGE_SIZE (_AC(1, UL) << AMD_GPU_PAGE_SHIFT) + +/** + * @get_sg_table_of_mmio_or_doorbel_bo - Builds and returns an instance + * of scatter gather table (sg_table) for a MMIO/DOORBELL BO. An example + * of this is the MMIO BO that's used to surface HDP registers. + * + * @note: This method will only work as long as the address encapsulated + * by MMIO/DOORBELL BO is not a DMA mapped address + * + * The method does the following: + * Acquire address to use in building scatterlist nodes + * Acquire size of memory to use in building scatterlist nodes + * Invoke DMA Map service to obtain DMA mapped address + * Access sg_table construction service with above parameters + * Return the handle of scatter gather table + * + * @adev: GPU device whose MMIO/DOORBELL BO is being exported + * @bo: Handle of MMIO/DOORBELL BO e.g. HDP registers + * @dma_dev: Handle of peer PCIe device that wishes to access + * @dir: Direction of data movement from peer PCIe devices perspective + * + * @sgt: Output parameter that is built and returned + * + * Return: zero if successful, non-zero otherwise + */ +static int get_sg_table_of_mmio_or_doorbel_bo(struct amdgpu_bo *bo, + struct device *dma_dev, enum dma_data_direction dir, + struct sg_table **sgt) +{ + dma_addr_t dma_addr; + s32 size, ret; + u64 addr; + + /* Acquire the address of MMIO or DOORBELL BO being + * exported. By policy the entire backing memory is + * encapsulated in one scatterlist node + */ + size = bo->tbo.sg->sgl->length; + addr = bo->tbo.sg->sgl->dma_address; + pr_debug("MMIO/Doorbell address being exported: %llx\n", addr); + + /* DMA map the acquired address - MMIO or DOORBELL */ + dma_addr = dma_map_resource(dma_dev, addr, size, + dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_mapping_error(dma_dev, dma_addr); + if (ret) + return ret; + + /* Update output parameter with a new sg_table */ + pr_debug("MMIO/Doorbell BO size: %d\n", size); + pr_debug("MMIO/Doorbell's DMA Address: %llx\n", dma_addr); + *sgt = create_sg_table(dma_addr, size); + return (*sgt) ? 0 : -ENOMEM; +} + +int amdgpu_amdkfd_gpuvm_get_sg_table(struct amdgpu_device *adev, + struct amdgpu_bo *bo, uint32_t flags, + uint64_t offset, uint64_t size, + struct device *dma_dev, enum dma_data_direction dir, + struct sg_table **ret_sg) +{ + struct sg_table *sg = NULL; + struct scatterlist *s; + struct page **pages; + uint64_t offset_in_page; + unsigned int page_size; + unsigned int cur_page; + unsigned int chunks; + unsigned int idx; + int ret; + + /* Determine access does not cross memory boundary */ + if (size + offset > amdgpu_bo_size(bo)) + return -EFAULT; + + /* For GPU memory use VRAM Mgr to build SG Table */ + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) { + ret = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, offset, + size, dma_dev, dir, &sg); + *ret_sg = (ret == 0) ? sg : NULL; + return ret; + } + + /* Handle BO (type: ttm_bo_type_sg) that is used to surface + * resources from MMIO address space. The allocation flag of + * BO fall in MMIO_REMAP / DOORBELL domain + */ + if (bo->tbo.type == ttm_bo_type_sg && + ((flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || + (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { + ret = get_sg_table_of_mmio_or_doorbel_bo(bo, dma_dev, dir, &sg); + *ret_sg = (ret == 0) ? sg : NULL; + return ret; + } + + /* Handle BO (type: ttm_bo_type_device) that is used to surface + * memory resources from GPU's GART aperture. The allocation flag + * of BO falls in GTT domain i.e. the physical backing memory is + * part of system memory. Construction of SG Table proceeds + * as follows: + * + * Allocate memory for SG Table + * Determine number of Scatterlist node in table + * Logic uses one Scatterlist node per PAGE_SIZE + * Allocate memory for Scatterlist nodes + * Initialize Scatterlist nodes to zero length + * Walk down system memory pointed by BO while + * Updating Scatterlist nodes with system memory info + */ + + sg = kmalloc(sizeof(*sg), GFP_KERNEL); + if (!sg) { + ret = -ENOMEM; + goto out; + } + + page_size = PAGE_SIZE; + offset_in_page = offset & (page_size - 1); + chunks = (size + offset_in_page + page_size - 1) + / page_size; + + ret = sg_alloc_table(sg, chunks, GFP_KERNEL); + if (unlikely(ret)) + goto out; + + for_each_sgtable_sg(sg, s, idx) + s->length = 0; + + pages = bo->tbo.ttm->pages; + cur_page = offset / page_size; + for_each_sg(sg->sgl, s, sg->orig_nents, idx) { + uint64_t chunk_size, length; + + chunk_size = page_size - offset_in_page; + length = min(size, chunk_size); + + sg_set_page(s, pages[cur_page], length, offset_in_page); + s->dma_address = page_to_phys(pages[cur_page]); + s->dma_length = length; + + size -= length; + offset_in_page = 0; + cur_page++; + } + + if (dma_dev) { + ret = dma_map_sgtable(dma_dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); + if (ret) + goto out_of_range; + } + + *ret_sg = sg; + return 0; + +out_of_range: + sg_free_table(sg); +out: + kfree(sg); + *ret_sg = NULL; + return ret; +} + +void amdgpu_amdkfd_gpuvm_put_sg_table(struct amdgpu_bo *bo, + struct device *dma_dev, enum dma_data_direction dir, + struct sg_table *sgt) +{ + /* Unmap GPU device memory */ + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) { + amdgpu_vram_mgr_free_sgt(dma_dev, dir, sgt); + return; + } + + /* Unmap system memory */ + if (dma_dev) + dma_unmap_sgtable(dma_dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(sgt); + kfree(sgt); +} + static int import_obj_create(struct amdgpu_device *adev, struct dma_buf *dma_buf, struct drm_gem_object *obj, @@ -2408,12 +2726,14 @@ static int import_obj_create(struct amdgpu_device *adev, INIT_LIST_HEAD(&(*mem)->attachments); mutex_init(&(*mem)->lock); - - (*mem)->alloc_flags = - ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? - KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) - | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE - | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; + if (bo->kfd_bo) + (*mem)->alloc_flags = bo->kfd_bo->alloc_flags; + else + (*mem)->alloc_flags = + ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) + | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE + | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; get_dma_buf(dma_buf); (*mem)->dmabuf = dma_buf; @@ -2448,6 +2768,45 @@ static int import_obj_create(struct amdgpu_device *adev, return ret; } +int amdgpu_amdkfd_gpuvm_import_ipcobj(struct amdgpu_device *adev, + struct dma_buf *dma_buf, + struct kfd_ipc_obj *ipc_obj, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct drm_gem_object *obj; + int ret; + + if (WARN_ON(!ipc_obj)) + return -EINVAL; + +#ifdef AMDKCL_AMDGPU_DMABUF_OPS + obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf); + if (IS_ERR(obj)) + return PTR_ERR(obj); +#else + obj = dma_buf->priv; + if (drm_to_adev(obj->dev) != adev) + /* Can't handle buffers from other devices */ + return -EINVAL; + drm_gem_object_get(obj); +#endif + + ret = import_obj_create(adev, dma_buf, obj, va, drm_priv, mem, size, + mmap_offset); + if (ret) + goto err_put_obj; + + (*mem)->ipc_obj = ipc_obj; + + return 0; + +err_put_obj: + drm_gem_object_put(obj); + return ret; +} + int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, @@ -2500,6 +2859,43 @@ int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, return ret; } +int amdgpu_amdkfd_gpuvm_export_ipc_obj(struct amdgpu_device *adev, void *vm, + struct kgd_mem *mem, + struct kfd_ipc_obj **ipc_obj, + uint32_t flags, + uint32_t *restore_handle) +{ + struct dma_buf *dmabuf; + int r = 0; + + if (!adev || !vm || !mem) + return -EINVAL; + + mutex_lock(&mem->lock); + + if (mem->ipc_obj) { + *ipc_obj = mem->ipc_obj; + goto unlock_out; + } + + r = kfd_mem_export_dmabuf(mem); + if (r) + goto unlock_out; + + get_dma_buf(mem->dmabuf); + dmabuf = mem->dmabuf; + + r = kfd_ipc_store_insert(dmabuf, &mem->ipc_obj, flags, restore_handle); + if (r) + dma_buf_put(dmabuf); + else + *ipc_obj = mem->ipc_obj; + +unlock_out: + mutex_unlock(&mem->lock); + return r; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it @@ -2510,9 +2906,16 @@ int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, * restore, where we get updated page addresses. This function only * ensures that GPU access to the BO is stopped. */ +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem) { + struct mm_struct *mm = mni->mm; +#else +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, + struct mm_struct *mm) +{ +#endif struct amdkfd_process_info *process_info = mem->process_info; int r = 0; @@ -2523,12 +2926,14 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, return 0; mutex_lock(&process_info->notifier_lock); +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED mmu_interval_set_seq(mni, cur_seq); +#endif mem->invalid++; if (++process_info->evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mni->mm, + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); @@ -2578,8 +2983,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); mem->range = NULL; +#endif /* BO reservations and getting user pages (hmm_range_fault) * must happen outside the notifier lock @@ -2602,6 +3009,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } } +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &mem->range); @@ -2620,9 +3028,35 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = 0; } +#else + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); + return -ENOMEM; + } + } else if (mem->user_pages[0]) { + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + } + /* Get updated user pages */ + ret = amdgpu_ttm_tt_get_user_pages(bo, mem->user_pages, NULL); + if (ret) { + mem->user_pages[0] = NULL; + pr_info("%s: Failed to get user pages: %d\n", + __func__, ret); + /* Pretend it succeeded. It will fail later + * with a VM fault if the GPU tries to access + * it. Better than hanging indefinitely with + * stalled user mode queues. + */ + } +#endif mutex_lock(&process_info->notifier_lock); - /* Mark the BO as valid unless it was invalidated * again concurrently. */ @@ -2631,7 +3065,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, goto unlock_out; } /* set mem valid if mem has hmm range associated */ +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED if (mem->range) +#endif mem->invalid = 0; } @@ -2696,6 +3132,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* Validate the BO if we got user pages */ if (bo->tbo.ttm->pages[0]) { amdgpu_bo_placement_from_domain(bo, mem->domain); @@ -2706,6 +3143,28 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } +#else + /* Copy pages array and validate the BO if we got user pages */ + if (mem->user_pages && mem->user_pages[0]) { + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + mem->user_pages); + amdgpu_bo_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) { + pr_err("%s: failed to validate BO\n", __func__); + goto unreserve_out; + } + } + + /* Validate succeeded, now the BO owns the pages, free + * our copy of the pointer array. + */ + if (mem->user_pages) { + kvfree(mem->user_pages); + mem->user_pages = NULL; + } +#endif + /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -2752,6 +3211,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_inval_list, validate_list) { +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED bool valid; /* keep mem without hmm range at userptr_inval_list */ @@ -2768,6 +3228,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i ret = -EAGAIN; continue; } +#endif if (mem->invalid) { WARN(1, "Valid BO is marked invalid"); @@ -2866,7 +3327,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) put_task_struct(usertask); } -static void replace_eviction_fence(struct dma_fence __rcu **ef, +static void replace_eviction_fence(struct dma_fence **ef, struct dma_fence *new_ef) { struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true @@ -2901,7 +3362,7 @@ static void replace_eviction_fence(struct dma_fence __rcu **ef, * 7. Add fence to all PD and PT BOs. * 8. Unreserve all BOs */ -int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef) +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) { struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; @@ -2970,7 +3431,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } - dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + dma_resv_for_each_fence(&cursor, amdkcl_ttm_resvp(&bo->tbo), DMA_RESV_USAGE_KERNEL, fence) { ret = amdgpu_sync_fence(&sync_obj, fence); if (ret) { @@ -3077,7 +3538,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * if (mem->bo->tbo.pin_count) continue; - dma_resv_add_fence(mem->bo->tbo.base.resv, + dma_resv_add_fence(amdkcl_ttm_resvp(&mem->bo->tbo), &process_info->eviction_fence->base, DMA_RESV_USAGE_BOOKKEEP); } @@ -3086,7 +3547,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * vm_list_node) { struct amdgpu_bo *bo = peer_vm->root.bo; - dma_resv_add_fence(bo->tbo.base.resv, + dma_resv_add_fence(amdkcl_ttm_resvp(&bo->tbo), &process_info->eviction_fence->base, DMA_RESV_USAGE_BOOKKEEP); } @@ -3138,10 +3599,10 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem * Add process eviction fence to bo so they can * evict each other. */ - ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(&gws_bo->tbo), 1); if (ret) goto reserve_shared_fail; - dma_resv_add_fence(gws_bo->tbo.base.resv, + dma_resv_add_fence(amdkcl_ttm_resvp(&gws_bo->tbo), &process_info->eviction_fence->base, DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(gws_bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c new file mode 100644 index 0000000000000..037e9aea2b691 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c @@ -0,0 +1,106 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_object.h" +#include "amdgpu_amdkfd.h" +#include +#include "amdgpu_ids.h" + +void amdgpu_amdkfd_rlc_spm_cntl(struct amdgpu_device *adev, bool cntl) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + spin_lock(&adev->gfx.kiq[0].ring_lock); + amdgpu_ring_alloc(kiq_ring, adev->gfx.spmfuncs->set_spm_config_size); + if (cntl) + adev->gfx.spmfuncs->start(adev); + else + adev->gfx.spmfuncs->stop(adev); + amdgpu_ring_commit(kiq_ring); + spin_unlock(&adev->gfx.kiq[0].ring_lock); +} + +void amdgpu_amdkfd_rlc_spm_set_rdptr(struct amdgpu_device *adev, u32 rptr) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + spin_lock(&adev->gfx.kiq[0].ring_lock); + amdgpu_ring_alloc(kiq_ring, adev->gfx.spmfuncs->set_spm_config_size); + adev->gfx.spmfuncs->set_rdptr(adev, rptr); + amdgpu_ring_commit(kiq_ring); + spin_unlock(&adev->gfx.kiq[0].ring_lock); +} + +int amdgpu_amdkfd_rlc_spm_acquire(struct amdgpu_device *adev, struct amdgpu_vm *vm, u64 gpu_addr, u32 size) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + int r; + + if (!adev->gfx.rlc.funcs->update_spm_vmid) + return -EINVAL; + + if (!vm->reserved_vmid[AMDGPU_GFXHUB(0)]) { + r = amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); + if (r) + return r; + vm->reserved_vmid[AMDGPU_GFXHUB(0)] = true; + } + + /* init spm vmid with 0x0 */ + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0); + + /* set spm ring registers */ + spin_lock(&adev->gfx.kiq[0].ring_lock); + amdgpu_ring_alloc(kiq_ring, adev->gfx.spmfuncs->set_spm_config_size); + adev->gfx.spmfuncs->set_spm_perfmon_ring_buf(adev, gpu_addr, size); + amdgpu_ring_commit(kiq_ring); + spin_unlock(&adev->gfx.kiq[0].ring_lock); + return r; +} + +void amdgpu_amdkfd_rlc_spm_release(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + /* stop spm stream and interrupt */ + spin_lock(&adev->gfx.kiq[0].ring_lock); + amdgpu_ring_alloc(kiq_ring, adev->gfx.spmfuncs->set_spm_config_size); + adev->gfx.spmfuncs->stop(adev); + amdgpu_ring_commit(kiq_ring); + spin_unlock(&adev->gfx.kiq[0].ring_lock); + + if (vm->reserved_vmid[AMDGPU_GFXHUB(0)]) { + amdgpu_vmid_free_reserved(adev,AMDGPU_GFXHUB(0)); + vm->reserved_vmid[AMDGPU_GFXHUB(0)] = false; + } + + /* revert spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); +} + +void amdgpu_amdkfd_rlc_spm_interrupt(struct amdgpu_device *adev) +{ + if (adev->kfd.dev) + kgd2kfd_spm_interrupt(adev->kfd.dev); +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 0c8975ac5af9e..7abbec85fb6ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1145,8 +1145,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, return 0; } -void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, - u32 eng_clock, u32 mem_clock) +int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, + u32 eng_clock, u32 mem_clock) { SET_ENGINE_CLOCK_PS_ALLOCATION args; int index = GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings); @@ -1161,8 +1161,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, if (mem_clock) args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + return amdgpu_atom_execute_table(adev->mode_info.atom_context, index, + (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev, @@ -1799,6 +1799,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, NULL); +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS static struct attribute *amdgpu_vbios_version_attrs[] = { &dev_attr_vbios_version.attr, NULL @@ -1816,6 +1817,7 @@ int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev) return 0; } +#endif /** * amdgpu_atombios_fini - free the driver info and callbacks for atombios @@ -1836,6 +1838,9 @@ void amdgpu_atombios_fini(struct amdgpu_device *adev) adev->mode_info.atom_context = NULL; kfree(adev->mode_info.atom_card_info); adev->mode_info.atom_card_info = NULL; +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + device_remove_file(adev->dev, &dev_attr_vbios_version); +#endif } /** @@ -1852,6 +1857,9 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) { struct card_info *atom_card_info = kzalloc(sizeof(struct card_info), GFP_KERNEL); +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + int ret; +#endif if (!atom_card_info) return -ENOMEM; @@ -1883,6 +1891,14 @@ int amdgpu_atombios_init(struct amdgpu_device *adev) amdgpu_atombios_allocate_fb_scratch(adev); } +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + ret = device_create_file(adev->dev, &dev_attr_vbios_version); + if (ret) { + DRM_ERROR("Failed to create device file for VBIOS version\n"); + return ret; + } +#endif + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 0811474e8fd33..442cc70474775 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -163,8 +163,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, bool strobe_mode, struct atom_mpll_param *mpll_param); -void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, - u32 eng_clock, u32 mem_clock); +int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, + u32 eng_clock, u32 mem_clock); bool amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev, @@ -214,6 +214,8 @@ int amdgpu_atombios_get_data_table(struct amdgpu_device *adev, void amdgpu_atombios_fini(struct amdgpu_device *adev); int amdgpu_atombios_init(struct amdgpu_device *adev); +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev); +#endif #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 375f020025797..3353c78a258aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -44,7 +44,9 @@ struct amdgpu_atpx { static struct amdgpu_atpx_priv { bool atpx_detected; +#ifdef AMDKCL_PCIE_BRIDGE_PM_USABLE bool bridge_pm_usable; +#endif unsigned int quirks; /* handle for device - and atpx */ acpi_handle dhandle; @@ -89,18 +91,6 @@ bool amdgpu_is_atpx_hybrid(void) return amdgpu_atpx_priv.atpx.is_hybrid; } -bool amdgpu_atpx_dgpu_req_power_for_displays(void) -{ - return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays; -} - -#if defined(CONFIG_ACPI) -void *amdgpu_atpx_get_dhandle(void) -{ - return amdgpu_atpx_priv.dhandle; -} -#endif - /** * amdgpu_atpx_call - call an ATPX method * @@ -233,11 +223,18 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; } else { pr_notice("ATPX Hybrid Graphics\n"); +#ifdef AMDKCL_PCIE_BRIDGE_PM_USABLE /* * Disable legacy PM methods only when pcie port PM is usable, * otherwise the device might fail to power off or power on. */ atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable; +#else + /* + * This is a temporary hack for the kernel doesn't support D3. + */ + atpx->functions.power_cntl = true; +#endif atpx->is_hybrid = true; } } @@ -616,16 +613,20 @@ static bool amdgpu_atpx_detect(void) struct pci_dev *pdev = NULL; bool has_atpx = false; int vga_count = 0; +#ifdef AMDKCL_PCIE_BRIDGE_PM_USABLE bool d3_supported = false; struct pci_dev *parent_pdev; +#endif while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { vga_count++; has_atpx |= amdgpu_atpx_pci_probe_handle(pdev); +#ifdef AMDKCL_PCIE_BRIDGE_PM_USABLE parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; +#endif amdgpu_atpx_get_quirks(pdev); } @@ -634,8 +635,10 @@ static bool amdgpu_atpx_detect(void) has_atpx |= amdgpu_atpx_pci_probe_handle(pdev); +#ifdef AMDKCL_PCIE_BRIDGE_PM_USABLE parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; +#endif amdgpu_atpx_get_quirks(pdev); } @@ -644,7 +647,9 @@ static bool amdgpu_atpx_detect(void) pr_info("vga_switcheroo: detected switching method %s handle\n", acpi_method_name); amdgpu_atpx_priv.atpx_detected = true; +#ifdef AMDKCL_PCIE_BRIDGE_PM_USABLE amdgpu_atpx_priv.bridge_pm_usable = d3_supported; +#endif amdgpu_atpx_init(); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 42e64bce661e4..45affc02548c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -87,8 +87,9 @@ static bool check_atom_bios(uint8_t *bios, size_t size) * part of the system bios. On boot, the system bios puts a * copy of the igp rom at the start of vram if a discrete card is * present. + * For SR-IOV, the vbios image is also put in VRAM in the VF. */ -static bool igp_read_bios_from_vram(struct amdgpu_device *adev) +static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) { uint8_t __iomem *bios; resource_size_t vram_base; @@ -284,10 +285,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) acpi_status status; bool found = false; - /* ATRM is for the discrete card only */ - if (adev->flags & AMD_IS_APU) - return false; - /* ATRM is for on-platform devices only */ if (dev_is_removable(&adev->pdev->dev)) return false; @@ -343,11 +340,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) - return igp_read_bios_from_vram(adev); - else - return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? - false : amdgpu_asic_read_disabled_bios(adev); + return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? + false : amdgpu_asic_read_disabled_bios(adev); } #ifdef CONFIG_ACPI @@ -414,7 +408,36 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) } #endif -bool amdgpu_get_bios(struct amdgpu_device *adev) +static bool amdgpu_get_bios_apu(struct amdgpu_device *adev) +{ + if (amdgpu_acpi_vfct_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from VFCT\n"); + goto success; + } + + if (amdgpu_read_bios_from_vram(adev)) { + dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); + goto success; + } + + if (amdgpu_read_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); + goto success; + } + + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + + dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); + return false; + +success: + return true; +} + +static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from ATRM\n"); @@ -426,7 +449,8 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } - if (igp_read_bios_from_vram(adev)) { + /* this is required for SR-IOV */ + if (amdgpu_read_bios_from_vram(adev)) { dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n"); goto success; } @@ -455,10 +479,24 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) return false; success: - adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; return true; } +bool amdgpu_get_bios(struct amdgpu_device *adev) +{ + bool found; + + if (adev->flags & AMD_IS_APU) + found = amdgpu_get_bios_apu(adev); + else + found = amdgpu_get_bios_dgpu(adev); + + if (found) + adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; + + return found; +} + /* helper function for soc15 and onwards to read bios from rom */ bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 555cd6d877c30..47734741388fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -24,6 +24,7 @@ #define __AMDGPU_BO_LIST_H__ #include +#include struct hmm_range; @@ -40,7 +41,12 @@ struct amdgpu_bo_list_entry { uint32_t priority; struct page **user_pages; struct hmm_range *range; +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED bool user_invalidated; +#else + int user_invalidated; + struct ttm_validate_buffer tv; +#endif }; struct amdgpu_bo_list { @@ -55,8 +61,12 @@ struct amdgpu_bo_list { /* Protect access during command submission. */ struct mutex bo_list_mutex; - +#ifdef __counted_by struct amdgpu_bo_list_entry entries[] __counted_by(num_entries); +#else + struct amdgpu_bo_list_entry entries[]; +#endif + }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 344e0a9ee08a9..2c9621a3a1fc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -109,7 +109,11 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) case DRM_MODE_CONNECTOR_DVII: case DRM_MODE_CONNECTOR_HDMIB: if (amdgpu_connector->use_digital) { +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) if (connector->display_info.is_hdmi) { +#else + if (drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -117,7 +121,11 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) break; case DRM_MODE_CONNECTOR_DVID: case DRM_MODE_CONNECTOR_HDMIA: +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) if (connector->display_info.is_hdmi) { +#else + if (drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -126,7 +134,11 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) dig_connector = amdgpu_connector->con_priv; if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) || +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) connector->display_info.is_hdmi) { +#else + drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif if (connector->display_info.bpc) bpc = connector->display_info.bpc; } @@ -150,7 +162,11 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) break; } +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) if (connector->display_info.is_hdmi) { +#else + if (drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif /* * Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make * much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at @@ -176,7 +192,11 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) { +#ifndef HAVE_DRM_DISPLAY_INFO_EDID_HDMI_RGB444_DC_MODES + if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) && +#else if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) && +#endif (mode_clock * 5/4 <= max_tmds_clock)) bpc = 10; else @@ -219,10 +239,17 @@ amdgpu_connector_update_scratch_regs(struct drm_connector *connector, struct drm_encoder *encoder; const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; bool connected; +#ifndef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + int i; +#endif best_encoder = connector_funcs->best_encoder(connector); +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) { +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) { +#endif if ((encoder == best_encoder) && (status == connector_status_connected)) connected = true; else @@ -237,8 +264,15 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, int encoder_type) { struct drm_encoder *encoder; +#ifndef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + int i; +#endif +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) { +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) { +#endif if (encoder->encoder_type == encoder_type) return encoder; } @@ -323,9 +357,16 @@ static struct drm_encoder * amdgpu_connector_best_single_encoder(struct drm_connector *connector) { struct drm_encoder *encoder; +#ifndef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + int i; +#endif /* pick the first one */ +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) +#endif return encoder; return NULL; @@ -1111,8 +1152,15 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) /* find analog encoder */ if (amdgpu_connector->dac_load_detect) { struct drm_encoder *encoder; +#ifndef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + int i; +#endif +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) { +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) { +#endif if (encoder->encoder_type != DRM_MODE_ENCODER_DAC && encoder->encoder_type != DRM_MODE_ENCODER_TVDAC) continue; @@ -1163,8 +1211,15 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct drm_encoder *encoder; +#ifndef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + int i; +#endif +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) { +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) { +#endif if (amdgpu_connector->use_digital == true) { if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS) return encoder; @@ -1179,7 +1234,11 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) /* then check use digitial */ /* pick the first one */ +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) +#endif return encoder; return NULL; @@ -1209,7 +1268,11 @@ static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) || (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) { return MODE_OK; +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) } else if (connector->display_info.is_hdmi) { +#else + } else if (drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif /* HDMI 1.3+ supports max clock of 340 Mhz */ if (mode->clock > 340000) return MODE_CLOCK_HIGH; @@ -1317,8 +1380,15 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn { struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; +#ifndef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + int i; +#endif +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) { +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) { +#endif amdgpu_encoder = to_amdgpu_encoder(encoder); switch (amdgpu_encoder->encoder_id) { @@ -1337,9 +1407,16 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector) { struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; +#ifndef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + int i; +#endif bool found = false; +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS drm_connector_for_each_possible_encoder(connector, encoder) { +#else + drm_connector_for_each_possible_encoder(connector, encoder, i) { +#endif amdgpu_encoder = to_amdgpu_encoder(encoder); if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2) found = true; @@ -1502,7 +1579,11 @@ static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) { return amdgpu_atombios_dp_mode_valid_helper(connector, mode); } else { +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) if (connector->display_info.is_hdmi) { +#else + if (drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif /* HDMI 1.3+ supports max clock of 340 Mhz */ if (mode->clock > 340000) return MODE_CLOCK_HIGH; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 78b3c067fea7e..b7746fda17f20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -40,6 +40,8 @@ #include "amdgpu_gmc.h" #include "amdgpu_gem.h" #include "amdgpu_ras.h" +#include "amdgpu_display.h" +#include "amdgpu_hmm.h" static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, struct amdgpu_device *adev, @@ -275,12 +277,15 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, case AMDGPU_CHUNK_ID_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_IN: case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: +#if defined(HAVE_AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: +#endif +#if defined(HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL) case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: +#endif case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: break; - default: goto free_partial_kdata; } @@ -296,6 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; + p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; } p->gang_leader = p->jobs[p->gang_leader_idx]; @@ -434,7 +440,6 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, } return 0; } - static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, uint32_t handle, u64 point, u64 flags) @@ -518,7 +523,9 @@ static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, drm_syncobj_find(p->filp, deps[i].handle); if (!p->post_deps[i].syncobj) return -EINVAL; +#if defined(HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL) p->post_deps[i].chain = NULL; +#endif p->post_deps[i].point = 0; p->num_post_deps++; } @@ -548,14 +555,14 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, for (i = 0; i < num_deps; ++i) { struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; - +#if defined(HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL) dep->chain = NULL; if (syncobj_deps[i].point) { dep->chain = dma_fence_chain_alloc(); if (!dep->chain) return -ENOMEM; } - +#endif dep->syncobj = drm_syncobj_find(p->filp, syncobj_deps[i].handle); if (!dep->syncobj) { @@ -592,7 +599,7 @@ static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) { unsigned int ce_preempt = 0, de_preempt = 0; - int i, r; + int i, r = 0; for (i = 0; i < p->nchunks; ++i) { struct amdgpu_cs_chunk *chunk; @@ -606,7 +613,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) return r; break; case AMDGPU_CHUNK_ID_DEPENDENCIES: +#if defined(HAVE_AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: +#endif r = amdgpu_cs_p2_dependencies(p, chunk); if (r) return r; @@ -621,6 +630,7 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) if (r) return r; break; +#if defined(HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL) case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk); if (r) @@ -631,6 +641,7 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) if (r) return r; break; +#endif case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: r = amdgpu_cs_p2_shadow(p, chunk); if (r) @@ -639,6 +650,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) } } + for (i = 0; i < p->gang_size; ++i) { + r = amdgpu_sem_add_cs(p->ctx, p->entities[i], &p->sync); + if (r) + return r; + } return 0; } @@ -786,7 +802,7 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .resv = bo->tbo.base.resv + .resv = amdkcl_ttm_resvp(&bo->tbo), }; uint32_t domain; int r; @@ -843,6 +859,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct drm_gem_object *obj; unsigned long index; +#ifndef HAVE_AMDKCL_HMM_MIRROR_ENABLED + unsigned tries = 10; +#endif unsigned int i; int r; @@ -865,6 +884,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, mutex_lock(&p->bo_list->bo_list_mutex); + +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* Get userptr backing pages. If pages are updated after registered * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do * amdgpu_ttm_backend_bind() to flush and invalidate new pages @@ -924,7 +945,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; } } - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct mm_struct *usermm; @@ -950,6 +970,96 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, kvfree(e->user_pages); e->user_pages = NULL; } +#else + while (1) { + struct list_head need_pages; + drm_exec_until_all_locked(&p->exec) { + r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size); + drm_exec_retry_on_contention(&p->exec); + if (unlikely(r)) + goto error_free_pages; + + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + /* One fence for TTM and one for each CS job */ + r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, + 1 + p->gang_size); + drm_exec_retry_on_contention(&p->exec); + if (unlikely(r)) + goto error_free_pages; + + e->bo_va = amdgpu_vm_bo_find(vm, e->bo); + } + + if (p->uf_bo) { + r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base, + 1 + p->gang_size); + drm_exec_retry_on_contention(&p->exec); + if (unlikely(r)) + goto error_free_pages; + } + } + INIT_LIST_HEAD(&need_pages); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = e->bo; + + if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, + &e->user_invalidated) && e->user_pages) { + + /* We acquired a page array, but somebody + * invalidated it. Free it and try again + */ + release_pages(e->user_pages, + bo->tbo.ttm->num_pages); + kvfree(e->user_pages); + e->user_pages = NULL; + } + + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && + !e->user_pages) { + list_del(&e->tv.head); + list_add(&e->tv.head, &need_pages); + + amdgpu_bo_unreserve(bo); + } + } + + if (list_empty(&need_pages)) + break; + + /* Unreserve everything again. */ + drm_exec_fini(&p->exec); + + /* We tried too many times, just abort */ + if (!--tries) { + r = -EDEADLK; + DRM_ERROR("deadlock in %s\n", __func__); + goto error_free_pages; + } + + /* Fill the page arrays for all userptrs. */ + list_for_each_entry(e, &need_pages, tv.head) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page*), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + r = -ENOMEM; + DRM_ERROR("calloc failure in %s\n", __func__); + goto error_free_pages; + } + + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, NULL); + if (r) { + DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); + kvfree(e->user_pages); + e->user_pages = NULL; + goto error_free_pages; + } + } + } +#endif + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, &p->bytes_moved_vis_threshold); @@ -986,7 +1096,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bo_list->oa_obj); return 0; + out_free_user_pages: +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = e->bo; @@ -997,6 +1109,17 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->user_pages = NULL; e->range = NULL; } +#else +error_free_pages: + + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + if (!e->user_pages) + continue; + + release_pages(e->user_pages, e->tv.bo->ttm->num_pages); + kvfree(e->user_pages); + } +#endif mutex_unlock(&p->bo_list->bo_list_mutex); return r; } @@ -1110,7 +1233,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) return -EINVAL; } } @@ -1215,7 +1338,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) drm_exec_for_each_locked_object(&p->exec, index, obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct dma_resv *resv = bo->tbo.base.resv; + struct dma_resv *resv = amdkcl_ttm_resvp(&bo->tbo); enum amdgpu_sync_mode sync_mode; sync_mode = amdgpu_bo_explicit_sync(bo) ? @@ -1260,6 +1383,7 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) int i; for (i = 0; i < p->num_post_deps; ++i) { +#if defined(HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL) if (p->post_deps[i].chain && p->post_deps[i].point) { drm_syncobj_add_point(p->post_deps[i].syncobj, p->post_deps[i].chain, @@ -1269,6 +1393,10 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) drm_syncobj_replace_fence(p->post_deps[i].syncobj, p->fence); } +#else + drm_syncobj_replace_fence(p->post_deps[i].syncobj, + p->fence); +#endif } } @@ -1295,11 +1423,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, fence = &p->jobs[i]->base.s_fence->scheduled; dma_fence_get(fence); +#ifdef HAVE_STRUCT_XARRAY r = drm_sched_job_add_dependency(&leader->base, fence); if (r) { dma_fence_put(fence); return r; } +#endif } if (p->gang_size > 1) { @@ -1307,6 +1437,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_job_set_gang_leader(p->jobs[i], leader); } +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* No memory allocation is allowed while holding the notifier lock. * The lock is held until amdgpu_cs_submit is finished and fence is * added to BOs. @@ -1327,6 +1458,19 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, mutex_unlock(&p->adev->notifier_lock); return r; } +#else + /* No memory allocation is allowed while holding the mn lock */ + amdgpu_mn_lock(p->mn); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { + r = -ERESTARTSYS; + amdgpu_mn_unlock(p->mn); + return r; + } + } +#endif p->fence = dma_fence_get(&leader->base.s_fence->finished); drm_exec_for_each_locked_object(&p->exec, index, gobj) { @@ -1338,13 +1482,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (p->jobs[i] == leader) continue; - dma_resv_add_fence(gobj->resv, + dma_resv_add_fence(amdkcl_gem_resvp(gobj), &p->jobs[i]->base.s_fence->finished, DMA_RESV_USAGE_READ); } /* The gang leader as remembered as writer */ - dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(amdkcl_gem_resvp(gobj), p->fence, DMA_RESV_USAGE_WRITE); } seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], @@ -1370,7 +1514,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED mutex_unlock(&p->adev->notifier_lock); +#else + amdgpu_mn_unlock(p->mn); +#endif mutex_unlock(&p->bo_list->bo_list_mutex); return 0; } @@ -1385,7 +1533,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); +#if defined(HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL) kfree(parser->post_deps[i].chain); +#endif } kfree(parser->post_deps); @@ -1797,7 +1947,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, *map = mapping; /* Double check that the BO is reserved by this CS */ - if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) + if (dma_resv_locking_ctx(amdkcl_ttm_resvp(&(*bo)->tbo)) != &parser->exec.ticket) return -EINVAL; (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b64..7d906ddbb30ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -28,9 +28,13 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { - uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev); - - addr = amdgpu_gmc_sign_extend(addr); + uint64_t addr; + if (adev->asic_type >= CHIP_NAVI10) { + addr = AMDGPU_VA_RESERVED_CSA_SIZE - AMDGPU_CSA_SIZE; + } else { + addr = AMDGPU_VA_RESERVED_CSA_START(adev); + addr = amdgpu_gmc_sign_extend(addr); + } return addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c43d1b6e5d66b..178aa7c18dc2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -216,6 +216,8 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, GFP_KERNEL); if (!entity) return -ENOMEM; + INIT_LIST_HEAD(&entity->sem_dep_list); + mutex_init(&entity->sem_lock); ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 85376baaa92f2..18b70d9239882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -39,6 +39,8 @@ struct amdgpu_ctx_entity { uint32_t hw_ip; uint64_t sequence; struct drm_sched_entity entity; + struct list_head sem_dep_list; + struct mutex sem_lock; struct dma_fence *fences[]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index cbef720de7797..da52911293612 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -42,6 +42,25 @@ #if defined(CONFIG_DEBUG_FS) +#if defined(AMDKCL_AMDGPU_DEBUGFS_CLEANUP) +void amdgpu_debugfs_cleanup(struct drm_minor *minor) +{ + struct drm_info_node *node, *tmp; + + if (!&minor->debugfs_root) + return; + + mutex_lock(&minor->debugfs_lock); + list_for_each_entry_safe(node, tmp, + &minor->debugfs_list, list) { + debugfs_remove(node->dent); + list_del(&node->list); + kfree(node); + } + mutex_unlock(&minor->debugfs_lock); +} +#endif + /** * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes * @@ -1670,9 +1689,13 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) } /* Avoid accidently unparking the sched thread during GPU reset */ +#ifndef HAVE_DOWN_WRITE_KILLABLE + down_write(&adev->reset_domain->sem); +#else r = down_write_killable(&adev->reset_domain->sem); if (r) return r; +#endif /* hold on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { @@ -1804,12 +1827,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_test_ib); DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_vm_info); +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_vram_fops, amdgpu_debugfs_evict_vram, NULL, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_gtt_fops, amdgpu_debugfs_evict_gtt, NULL, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_benchmark_fops, NULL, amdgpu_debugfs_benchmark, "%lld\n"); +#endif static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring, struct dma_fence **fences) @@ -1935,9 +1960,13 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) return -ENOMEM; /* Avoid accidently unparking the sched thread during GPU reset */ +#ifdef HAVE_DOWN_READ_KILLABLE r = down_read_killable(&adev->reset_domain->sem); if (r) goto pro_end; +#else + down_read(&adev->reset_domain->sem); +#endif /* stop the scheduler */ drm_sched_wqueue_stop(&ring->sched); @@ -2020,11 +2049,13 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) return ret; } +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, amdgpu_debugfs_ib_preempt, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); +#endif int amdgpu_debugfs_init(struct amdgpu_device *adev) { @@ -2035,6 +2066,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (!debugfs_initialized()) return 0; +#ifdef DEFINE_DEBUGFS_ATTRIBUTE debugfs_create_x32("amdgpu_smu_debug", 0600, root, &adev->pm.smu_debug_mask); @@ -2051,6 +2083,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n"); return PTR_ERR(ent); } +#endif /* Register debugfs entries for amdgpu_ttm */ amdgpu_ttm_debugfs_init(adev); @@ -2095,21 +2128,25 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog) amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); + amdgpu_debugfs_jpeg_sched_mask_init(adev); + amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); +#ifdef DEFINE_DEBUGFS_ATTRIBUTE debugfs_create_file("amdgpu_evict_vram", 0444, root, adev, &amdgpu_evict_vram_fops); debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev, &amdgpu_evict_gtt_fops); + debugfs_create_file("amdgpu_benchmark", 0200, root, adev, + &amdgpu_benchmark_fops); +#endif debugfs_create_file("amdgpu_test_ib", 0444, root, adev, &amdgpu_debugfs_test_ib_fops); debugfs_create_file("amdgpu_vm_info", 0444, root, adev, &amdgpu_debugfs_vm_info_fops); - debugfs_create_file("amdgpu_benchmark", 0200, root, adev, - &amdgpu_benchmark_fops); adev->debugfs_vbios_blob.data = adev->bios; adev->debugfs_vbios_blob.size = adev->bios_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 0425432d8659b..f6d0ac99a42d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -25,7 +25,11 @@ /* * Debugfs */ - +#if defined(CONFIG_DEBUG_FS) +#if defined(AMDKCL_AMDGPU_DEBUGFS_CLEANUP) +void amdgpu_debugfs_cleanup(struct drm_minor *minor); +#endif +#endif int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev); void amdgpu_debugfs_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index cf2b4dd4d865a..946c48829f197 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -28,8 +28,8 @@ #include "atom.h" #ifndef CONFIG_DEV_COREDUMP -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) +void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, + bool vram_lost, struct amdgpu_job *job) { } #else @@ -203,6 +203,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; + struct amdgpu_ip_block *ip_block; int ver; iter.data = buffer; @@ -282,13 +283,10 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, /* dump the ip state for each ip */ drm_printf(&p, "IP Dump\n"); for (int i = 0; i < coredump->adev->num_ip_blocks; i++) { - if (coredump->adev->ip_blocks[i].version->funcs->print_ip_state) { - drm_printf(&p, "IP: %s\n", - coredump->adev->ip_blocks[i] - .version->funcs->name); - coredump->adev->ip_blocks[i] - .version->funcs->print_ip_state( - (void *)coredump->adev, &p); + ip_block = &coredump->adev->ip_blocks[i]; + if (ip_block->version->funcs->print_ip_state) { + drm_printf(&p, "IP: %s\n", ip_block->version->funcs->name); + ip_block->version->funcs->print_ip_state(ip_block, &p); drm_printf(&p, "\n"); } } @@ -315,7 +313,9 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, } } - if (coredump->reset_vram_lost) + if (coredump->skip_vram_check) + drm_printf(&p, "VRAM lost check is skipped!\n"); + else if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); return count - iter.remain; @@ -326,12 +326,11 @@ static void amdgpu_devcoredump_free(void *data) kfree(data); } -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) +void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, + bool vram_lost, struct amdgpu_job *job) { - struct amdgpu_coredump_info *coredump; struct drm_device *dev = adev_to_drm(adev); - struct amdgpu_job *job = reset_context->job; + struct amdgpu_coredump_info *coredump; struct drm_sched_job *s_job; coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); @@ -341,11 +340,12 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, return; } + coredump->skip_vram_check = skip_vram_check; coredump->reset_vram_lost = vram_lost; - if (reset_context->job && reset_context->job->vm) { + if (job && job->vm) { + struct amdgpu_vm *vm = job->vm; struct amdgpu_task_info *ti; - struct amdgpu_vm *vm = reset_context->job->vm; ti = amdgpu_vm_get_task_info_vm(vm); if (ti) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h index 52459512cb2b1..ef9772c6bcc9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -26,7 +26,6 @@ #define __AMDGPU_DEV_COREDUMP_H__ #include "amdgpu.h" -#include "amdgpu_reset.h" #ifdef CONFIG_DEV_COREDUMP @@ -36,12 +35,12 @@ struct amdgpu_coredump_info { struct amdgpu_device *adev; struct amdgpu_task_info reset_task_info; struct timespec64 reset_time; + bool skip_vram_check; bool reset_vram_lost; struct amdgpu_ring *ring; }; #endif -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context); - +void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, + bool vram_lost, struct amdgpu_job *job); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a6b8d0ba4758c..d1bb9e85b6d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -29,10 +29,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -144,6 +146,51 @@ const char *amdgpu_asic_name[] = { "LAST", }; +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0) +/* + * Default init level where all blocks are expected to be initialized. This is + * the level of initialization expected by default and also after a full reset + * of the device. + */ +struct amdgpu_init_level amdgpu_init_default = { + .level = AMDGPU_INIT_LEVEL_DEFAULT, + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, +}; + +/* + * Minimal blocks needed to be initialized before a XGMI hive can be reset. This + * is used for cases like reset on initialization where the entire hive needs to + * be reset before first use. + */ +struct amdgpu_init_level amdgpu_init_minimal_xgmi = { + .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI, + .hwini_ip_block_mask = + BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) | + BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) | + BIT(AMD_IP_BLOCK_TYPE_PSP) +}; + +static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev, + enum amd_ip_block_type block) +{ + return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0; +} + +void amdgpu_set_init_level(struct amdgpu_device *adev, + enum amdgpu_init_lvl_id lvl) +{ + switch (lvl) { + case AMDGPU_INIT_LEVEL_MINIMAL_XGMI: + adev->init_lvl = &amdgpu_init_minimal_xgmi; + break; + case AMDGPU_INIT_LEVEL_DEFAULT: + fallthrough; + default: + adev->init_lvl = &amdgpu_init_default; + break; + } +} + static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); /** @@ -242,7 +289,7 @@ void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) * - "unknown" - Not known * */ - +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS static ssize_t amdgpu_device_get_board_info(struct device *dev, struct device_attribute *attr, char *buf) @@ -294,6 +341,7 @@ static const struct attribute_group amdgpu_board_attrs_group = { .attrs = amdgpu_board_attrs, .is_visible = amdgpu_board_attrs_is_visible }; +#endif static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); @@ -1517,6 +1565,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) spin_unlock_irqrestore(&adev->wb.lock, flags); } +#ifdef AMDKCL_ENABLE_RESIZE_FB_BAR /** * amdgpu_device_resize_fb_bar - try to resize FB BAR * @@ -1601,6 +1650,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) return 0; } +#endif static bool amdgpu_device_read_bios(struct amdgpu_device *adev) { @@ -1655,7 +1705,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) } /* Don't post if we need to reset whole hive on init */ - if (adev->gmc.xgmi.pending_reset) + if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) return false; if (adev->has_hw_reset) { @@ -1756,7 +1806,11 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) return false; if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) return false; +#ifdef HAVE_PCIE_ASPM_ENABLED return pcie_aspm_enabled(adev->pdev); +#else + return false; +#endif } /* if we get transitioned to only one device, take VGA back */ @@ -1769,11 +1823,18 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) * Enable/disable vga decode (all asics). * Returns VGA resource flags. */ +#ifdef HAVE_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, bool state) +#else +static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) +#endif { +#ifdef HAVE_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); - +#else + struct amdgpu_device *adev = cookie; +#endif amdgpu_asic_set_vga_state(adev, state); if (state) return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | @@ -1916,6 +1977,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int i; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -1969,6 +2032,10 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_device_check_block_size(adev); adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + amdgpu_direct_gma_size = min(amdgpu_direct_gma_size, 96); + + for (i = 0; i < MAX_XCP; i++) + adev->enforce_isolation[i] = !!enforce_isolation; return 0; } @@ -2035,7 +2102,11 @@ static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) * locking inversion with the driver load path. And the access here is * completely racy anyway. So don't bother with locking for now. */ +#ifdef HAVE_DRM_DEVICE_OPEN_COUNT_INT + return dev->open_count == 0; +#else return atomic_read(&dev->open_count) == 0; +#endif } static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { @@ -2154,7 +2225,7 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, if (!adev->ip_blocks[i].status.valid) continue; if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); + r = adev->ip_blocks[i].version->funcs->wait_for_idle(&adev->ip_blocks[i]); if (r) return r; break; @@ -2165,26 +2236,24 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, } /** - * amdgpu_device_ip_is_idle - is the hardware IP idle + * amdgpu_device_ip_is_valid - is the hardware IP enabled * * @adev: amdgpu_device pointer * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) * - * Check if the hardware IP is idle or not. - * Returns true if it the IP is idle, false if not. + * Check if the hardware IP is enable or not. + * Returns true if it the IP is enable, false if not. */ -bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) +bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) { int i; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; if (adev->ip_blocks[i].version->type == block_type) - return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); + return adev->ip_blocks[i].status.valid; } - return true; + return false; } @@ -2266,6 +2335,8 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, ip_block_version->funcs->name); + adev->ip_blocks[adev->num_ip_blocks].adev = adev; + adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; return 0; @@ -2561,25 +2632,25 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) total = true; for (i = 0; i < adev->num_ip_blocks; i++) { + ip_block = &adev->ip_blocks[i]; + if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_WARN("disabled ip block: %d <%s>\n", i, adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; - } else { - if (adev->ip_blocks[i].version->funcs->early_init) { - r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); - if (r == -ENOENT) { - adev->ip_blocks[i].status.valid = false; - } else if (r) { - DRM_ERROR("early_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - total = false; - } else { - adev->ip_blocks[i].status.valid = true; - } + } else if (ip_block->version->funcs->early_init) { + r = ip_block->version->funcs->early_init(ip_block); + if (r == -ENOENT) { + adev->ip_blocks[i].status.valid = false; + } else if (r) { + DRM_ERROR("early_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + total = false; } else { adev->ip_blocks[i].status.valid = true; } + } else { + adev->ip_blocks[i].status.valid = true; } /* get the vbios after the asic_funcs are set up */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { @@ -2628,10 +2699,13 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hw) continue; + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) + continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2653,7 +2727,10 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hw) continue; - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) + continue; + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2676,6 +2753,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) continue; + if (!amdgpu_ip_member_of_hwini(adev, + AMD_IP_BLOCK_TYPE_PSP)) + break; + if (!adev->ip_blocks[i].status.sw) continue; @@ -2684,14 +2765,14 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) break; if (amdgpu_in_reset(adev) || adev->in_suspend) { - r = adev->ip_blocks[i].version->funcs->resume(adev); + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); return r; } } else { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2781,6 +2862,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) */ static int amdgpu_device_ip_init(struct amdgpu_device *adev) { + bool init_badpage; int i, r; r = amdgpu_ras_init(adev); @@ -2790,7 +2872,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); + r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("sw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2798,9 +2880,13 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } adev->ip_blocks[i].status.sw = true; + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { /* need to do common hw init early so everything is set up for gmc */ - r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init %d failed %d\n", i, r); goto init_failed; @@ -2817,7 +2903,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); goto init_failed; } - r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("hw_init %d failed %d\n", i, r); goto init_failed; @@ -2890,7 +2976,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) * Note: theoretically, this should be called before all vram allocations * to protect retired page from abusing */ - r = amdgpu_ras_recovery_init(adev); + init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI); + r = amdgpu_ras_recovery_init(adev, init_badpage); if (r) goto init_failed; @@ -2930,7 +3017,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, true); /* Don't init kfd if whole hive need to be reset during init */ - if (!adev->gmc.xgmi.pending_reset) { + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { kgd2kfd_init_zone_device(adev); amdgpu_amdkfd_device_init(adev); } @@ -3130,7 +3217,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->funcs->late_init) { - r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); + r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]); if (r) { DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -3219,7 +3306,7 @@ static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { - r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); + r = adev->ip_blocks[i].version->funcs->hw_fini(&adev->ip_blocks[i]); /* XXX handle errors */ if (r) { DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", @@ -3239,7 +3326,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) if (!adev->ip_blocks[i].version->funcs->early_fini) continue; - r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); + r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]); if (r) { DRM_DEBUG("early_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -3258,7 +3345,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.hw) continue; - r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); + r = adev->ip_blocks[i].version->funcs->hw_fini(&adev->ip_blocks[i]); /* XXX handle errors */ if (r) { DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", @@ -3312,7 +3399,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_seq64_fini(adev); } - r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); + r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); /* XXX handle errors */ if (r) { DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", @@ -3326,7 +3413,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.late_initialized) continue; if (adev->ip_blocks[i].version->funcs->late_fini) - adev->ip_blocks[i].version->funcs->late_fini((void *)adev); + adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]); adev->ip_blocks[i].status.late_initialized = false; } @@ -3398,7 +3485,7 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) continue; /* XXX handle errors */ - r = adev->ip_blocks[i].version->funcs->suspend(adev); + r = adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]); /* XXX handle errors */ if (r) { DRM_ERROR("suspend of IP block <%s> failed %d\n", @@ -3444,14 +3531,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) } /* skip unnecessary suspend if we do not initialize them yet */ - if (adev->gmc.xgmi.pending_reset && - !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) { - adev->ip_blocks[i].status.hw = false; + if (!amdgpu_ip_member_of_hwini( + adev, adev->ip_blocks[i].version->type)) continue; - } /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just @@ -3485,7 +3567,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) continue; /* XXX handle errors */ - r = adev->ip_blocks[i].version->funcs->suspend(adev); + r = adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]); /* XXX handle errors */ if (r) { DRM_ERROR("suspend of IP block <%s> failed %d\n", @@ -3565,7 +3647,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) !block->status.valid) continue; - r = block->version->funcs->hw_init(adev); + r = block->version->funcs->hw_init(&adev->ip_blocks[i]); DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; @@ -3605,9 +3687,9 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) continue; if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) - r = block->version->funcs->resume(adev); + r = block->version->funcs->resume(&adev->ip_blocks[i]); else - r = block->version->funcs->hw_init(adev); + r = block->version->funcs->hw_init(&adev->ip_blocks[i]); DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) @@ -3643,7 +3725,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { - r = adev->ip_blocks[i].version->funcs->resume(adev); + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -3661,7 +3743,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * First resume function for hardware IPs. The list of all the hardware + * Second resume function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the resume callbacks are run for * all blocks except COMMON, GMC, and IH. resume puts the hardware into a * functional state after a suspend and updates the software state as @@ -3679,9 +3761,10 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) continue; - r = adev->ip_blocks[i].version->funcs->resume(adev); + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -3693,6 +3776,36 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs + * + * @adev: amdgpu_device pointer + * + * Third resume function for hardware IPs. The list of all the hardware + * IPs that make up the asic is walked and the resume callbacks are run for + * all DCE. resume puts the hardware into a functional state after a suspend + * and updates the software state as necessary. This function is also used + * for restoring the GPU after a GPU reset. + * + * Returns 0 on success, negative error code on failure. + */ +static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); + if (r) + return r; + } + } + + return 0; +} + /** * amdgpu_device_ip_resume - run resume for hardware IPs * @@ -3722,6 +3835,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring->sched.ready) amdgpu_ttm_set_buffer_funcs_status(adev, true); + if (r) + return r; + + amdgpu_fence_driver_hw_init(adev); + + r = amdgpu_device_ip_resume_phase3(adev); + return r; } @@ -4056,9 +4176,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.rlcg_reg_lock); hash_init(adev->mn_hash); mutex_init(&adev->psp.mutex); +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED mutex_init(&adev->notifier_lock); +#endif mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); + mutex_init(&adev->gfx.reset_sem_mutex); + /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ + mutex_init(&adev->enforce_isolation_mutex); + mutex_init(&adev->gfx.kfd_sch_mutex); amdgpu_device_init_apu_flags(adev); @@ -4077,9 +4203,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->mm_stats.lock); spin_lock_init(&adev->wb.lock); - INIT_LIST_HEAD(&adev->shadow_list); - mutex_init(&adev->shadow_list_lock); - INIT_LIST_HEAD(&adev->reset_list); INIT_LIST_HEAD(&adev->ras_list); @@ -4090,6 +4213,21 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + /* + * Initialize the enforce_isolation work structures for each XCP + * partition. This work handler is responsible for enforcing shader + * isolation on AMD GPUs. It counts the number of emitted fences for + * each GFX and compute ring. If there are any fences, it schedules + * the `enforce_isolation_work` to be run after a delay. If there are + * no fences, it signals the Kernel Fusion Driver (KFD) to resume the + * runqueue. + */ + for (i = 0; i < MAX_XCP; i++) { + INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work, + amdgpu_gfx_enforce_isolation_handler); + adev->gfx.enforce_isolation[i].adev = adev; + adev->gfx.enforce_isolation[i].xcp_id = i; + } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); @@ -4107,7 +4245,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, * for throttling interrupt) = 60 seconds. */ ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); + ratelimit_state_init(&adev->virt.ras_telemetry_rs, 5 * HZ, 1); + +#ifdef RATELIMIT_MSG_ON_RELEASE ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); + ratelimit_set_flags(&adev->virt.ras_telemetry_rs, RATELIMIT_MSG_ON_RELEASE); +#endif /* Registers mapping */ /* TODO: block userspace mapping of io register */ @@ -4151,13 +4294,23 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_set_mcbp(adev); + /* + * By default, use default mode where all blocks are expected to be + * initialized. At present a 'swinit' of blocks is required to be + * completed before the need for a different level is detected. + */ + amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT); /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) return r; /* Get rid of things like offb */ +#ifdef HAVE_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_DRM_DRIVER_ARG r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); +#else + r = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb"); +#endif if (r) return r; @@ -4223,20 +4376,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { if (adev->gmc.xgmi.num_physical_nodes) { dev_info(adev->dev, "Pending hive reset.\n"); - adev->gmc.xgmi.pending_reset = true; - /* Only need to init necessary block for SMU to handle the reset */ - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) { - DRM_DEBUG("IP %s disabled for hw_init.\n", - adev->ip_blocks[i].version->funcs->name); - adev->ip_blocks[i].status.hw = true; - } - } + amdgpu_set_init_level(adev, + AMDGPU_INIT_LEVEL_MINIMAL_XGMI); } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && !amdgpu_device_has_display_hardware(adev)) { r = psp_gpu_reset(adev); @@ -4306,6 +4447,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* init the mode config */ drm_mode_config_init(adev_to_drm(adev)); + if (amdgpu_sriov_vf(adev)) { + adev->timeout_wq = alloc_ordered_workqueue("amdgpu_ring_timeout_wq", 0); + if (!adev->timeout_wq) + dev_warn(adev->dev, "alloc_ordered_workqueue failed\n"); + } + r = amdgpu_device_ip_init(adev); if (r) { dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); @@ -4344,7 +4491,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ - if (!adev->gmc.xgmi.pending_reset) { + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { r = amdgpu_device_ip_late_init(adev); if (r) { dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); @@ -4367,10 +4514,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, * operations performed in `late_init` might affect the sysfs * interfaces creating. */ +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS r = amdgpu_atombios_sysfs_init(adev); if (r) drm_err(&adev->ddev, "registering atombios sysfs failed (%d).\n", r); +#endif r = amdgpu_pm_sysfs_init(adev); if (r) @@ -4386,11 +4535,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); if (r) dev_err(adev->dev, "Could not create amdgpu device attr\n"); - +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group); if (r) dev_err(adev->dev, "Could not create amdgpu board attributes\n"); +#endif amdgpu_fru_sysfs_init(adev); amdgpu_reg_state_sysfs_init(adev); @@ -4409,7 +4559,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, * ignore it */ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) +#ifdef HAVE_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); +#else + vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); +#endif px = amdgpu_device_supports_px(ddev); @@ -4421,9 +4575,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (px) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); - if (adev->gmc.xgmi.pending_reset) - queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); + if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) + amdgpu_xgmi_reset_on_init(adev); amdgpu_device_check_iommu_direct_map(adev); @@ -4486,6 +4639,9 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { dev_info(adev->dev, "amdgpu: finishing device.\n"); flush_delayed_work(&adev->delayed_init_work); + + if (adev->mman.initialized) + drain_workqueue(adev->mman.bdev.wq); adev->shutdown = true; /* make sure IB test finished before entering exclusive mode @@ -4506,9 +4662,6 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) } amdgpu_fence_driver_hw_fini(adev); - if (adev->mman.initialized) - drain_workqueue(adev->mman.bdev.wq); - if (adev->pm.sysfs_initialized) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) @@ -4573,7 +4726,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) vga_switcheroo_fini_domain_pm_ops(adev->dev); if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) +#ifdef HAVE_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE vga_client_unregister(adev->pdev); +#else + vga_client_register(adev->pdev, NULL, NULL, NULL); +#endif if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -4652,7 +4809,7 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; if (!adev->ip_blocks[i].version->funcs->prepare_suspend) continue; - r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); + r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) goto unprepare; } @@ -4767,7 +4924,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); goto exit; } - amdgpu_fence_driver_hw_init(adev); if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); @@ -4856,7 +5012,8 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->funcs->check_soft_reset) adev->ip_blocks[i].status.hang = - adev->ip_blocks[i].version->funcs->check_soft_reset(adev); + adev->ip_blocks[i].version->funcs->check_soft_reset( + &adev->ip_blocks[i]); if (adev->ip_blocks[i].status.hang) { dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); asic_hang = true; @@ -4885,7 +5042,7 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hang && adev->ip_blocks[i].version->funcs->pre_soft_reset) { - r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); + r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]); if (r) return r; } @@ -4947,7 +5104,7 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hang && adev->ip_blocks[i].version->funcs->soft_reset) { - r = adev->ip_blocks[i].version->funcs->soft_reset(adev); + r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]); if (r) return r; } @@ -4976,7 +5133,7 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].status.hang && adev->ip_blocks[i].version->funcs->post_soft_reset) - r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); + r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]); if (r) return r; } @@ -4984,80 +5141,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_device_recover_vram - Recover some VRAM contents - * - * @adev: amdgpu_device pointer - * - * Restores the contents of VRAM buffers from the shadows in GTT. Used to - * restore things like GPUVM page tables after a GPU reset where - * the contents of VRAM might be lost. - * - * Returns: - * 0 on success, negative error code on failure. - */ -static int amdgpu_device_recover_vram(struct amdgpu_device *adev) -{ - struct dma_fence *fence = NULL, *next = NULL; - struct amdgpu_bo *shadow; - struct amdgpu_bo_vm *vmbo; - long r = 1, tmo; - - if (amdgpu_sriov_runtime(adev)) - tmo = msecs_to_jiffies(8000); - else - tmo = msecs_to_jiffies(100); - - dev_info(adev->dev, "recover vram bo from shadow start\n"); - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { - /* If vm is compute context or adev is APU, shadow will be NULL */ - if (!vmbo->shadow) - continue; - shadow = vmbo->shadow; - - /* No need to recover an evicted BO */ - if (!shadow->tbo.resource || - shadow->tbo.resource->mem_type != TTM_PL_TT || - shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || - shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) - continue; - - r = amdgpu_bo_restore_shadow(shadow, &next); - if (r) - break; - - if (fence) { - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - fence = next; - if (tmo == 0) { - r = -ETIMEDOUT; - break; - } else if (tmo < 0) { - r = tmo; - break; - } - } else { - fence = next; - } - } - mutex_unlock(&adev->shadow_list_lock); - - if (fence) - tmo = dma_fence_wait_timeout(fence, false, tmo); - dma_fence_put(fence); - - if (r < 0 || tmo <= 0) { - dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); - return -EIO; - } - - dev_info(adev->dev, "recover vram bo from shadow done\n"); - return 0; -} - - /** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * @@ -5085,7 +5168,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_ras_set_fed(adev, false); + amdgpu_ras_clear_err_state(adev); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -5120,12 +5203,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) amdgpu_inc_vram_lost(adev); - r = amdgpu_device_recover_vram(adev); - } - if (r) - return r; /* need to be called during full access so we can't do it later like * bare-metal does. @@ -5139,20 +5218,25 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); + + amdgpu_virt_ras_telemetry_post_reset(adev); + return 0; } /** - * amdgpu_device_has_job_running - check if there is any job in mirror list + * amdgpu_device_has_job_running - check if there is any unfinished job * * @adev: amdgpu_device pointer * - * check if there is any job in mirror list + * check if there is any job running on the device when guest driver receives + * FLR notification from host driver. If there are still jobs running, then + * the guest driver will not respond the FLR reset. Instead, let the job hit + * the timeout and guest driver then issue the reset request. */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) { int i; - struct drm_sched_job *job; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -5160,11 +5244,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) if (!amdgpu_ring_sched_ready(ring)) continue; - spin_lock(&ring->sched.job_list_lock); - job = list_first_entry_or_null(&ring->sched.pending_list, - struct drm_sched_job, list); - spin_unlock(&ring->sched.job_list_lock); - if (job) + if (amdgpu_fence_count_emitted(ring)) return true; } return false; @@ -5345,7 +5425,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, for (i = 0; i < tmp_adev->num_ip_blocks; i++) if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) tmp_adev->ip_blocks[i].version->funcs - ->dump_ip_state((void *)tmp_adev); + ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]); dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); } @@ -5361,76 +5441,27 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } -int amdgpu_do_asic_reset(struct list_head *device_list_handle, - struct amdgpu_reset_context *reset_context) +int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) { - struct amdgpu_device *tmp_adev = NULL; - bool need_full_reset, skip_hw_reset, vram_lost = false; - int r = 0; - - /* Try reset handler method first */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); - - reset_context->reset_device_list = device_list_handle; - r = amdgpu_reset_perform_reset(tmp_adev, reset_context); - /* If reset handler not implemented, continue; otherwise return */ - if (r == -EOPNOTSUPP) - r = 0; - else - return r; - - /* Reset handler not implemented, use the default method */ - need_full_reset = - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); - skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); - - /* - * ASIC reset has to be done on all XGMI hive nodes ASAP - * to allow proper links negotiation in FW (within 1 sec) - */ - if (!skip_hw_reset && need_full_reset) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - /* For XGMI run all resets in parallel to speed up the process */ - if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - tmp_adev->gmc.xgmi.pending_reset = false; - if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) - r = -EALREADY; - } else - r = amdgpu_asic_reset(tmp_adev); - - if (r) { - dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s", - r, adev_to_drm(tmp_adev)->unique); - goto out; - } - } + struct list_head *device_list_handle; + bool full_reset, vram_lost = false; + struct amdgpu_device *tmp_adev; + int r; - /* For XGMI wait for all resets to complete before proceed */ - if (!r) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { - flush_work(&tmp_adev->xgmi_reset_work); - r = tmp_adev->asic_reset_res; - if (r) - break; - } - } - } - } + device_list_handle = reset_context->reset_device_list; - if (!r && amdgpu_ras_intr_triggered()) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB); - } + if (!device_list_handle) + return -EINVAL; - amdgpu_ras_intr_cleared(); - } + full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + r = 0; list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (need_full_reset) { + /* After reset, it's default init level */ + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); + if (full_reset) { /* post card */ - amdgpu_ras_set_fed(tmp_adev, false); + amdgpu_ras_clear_err_state(tmp_adev); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5444,7 +5475,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); @@ -5467,6 +5498,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (tmp_adev->mman.buffer_funcs_ring->sched.ready) amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true); + r = amdgpu_device_ip_resume_phase3(tmp_adev); + if (r) + goto out; + if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); @@ -5518,23 +5553,95 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, r = amdgpu_ib_ring_tests(tmp_adev); if (r) { dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); - need_full_reset = true; r = -EAGAIN; goto end; } } - if (!r) - r = amdgpu_device_recover_vram(tmp_adev); - else + if (r) tmp_adev->asic_reset_res = r; } end: - if (need_full_reset) + return r; +} + +int amdgpu_do_asic_reset(struct list_head *device_list_handle, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *tmp_adev = NULL; + bool need_full_reset, skip_hw_reset; + int r = 0; + + /* Try reset handler method first */ + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + + reset_context->reset_device_list = device_list_handle; + r = amdgpu_reset_perform_reset(tmp_adev, reset_context); + /* If reset handler not implemented, continue; otherwise return */ + if (r == -EOPNOTSUPP) + r = 0; + else + return r; + + /* Reset handler not implemented, use the default method */ + need_full_reset = + test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); + + /* + * ASIC reset has to be done on all XGMI hive nodes ASAP + * to allow proper links negotiation in FW (within 1 sec) + */ + if (!skip_hw_reset && need_full_reset) { + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + /* For XGMI run all resets in parallel to speed up the process */ + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + if (!queue_work(system_unbound_wq, + &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + } else + r = amdgpu_asic_reset(tmp_adev); + + if (r) { + dev_err(tmp_adev->dev, + "ASIC reset failed with error, %d for drm dev, %s", + r, adev_to_drm(tmp_adev)->unique); + goto out; + } + } + + /* For XGMI wait for all resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, device_list_handle, + reset_list) { + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } + } + } + + if (!r && amdgpu_ras_intr_triggered()) { + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + amdgpu_ras_reset_error_count(tmp_adev, + AMDGPU_RAS_BLOCK__MMHUB); + } + + amdgpu_ras_intr_cleared(); + } + + r = amdgpu_device_reinit_after_reset(reset_context); + if (r == -EAGAIN) set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); else clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + +out: return r; } @@ -5626,6 +5733,8 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + amdgpu_reset_domain_clear_pending(adev->reset_domain); + #if defined(CONFIG_DEBUG_FS) if (!amdgpu_sriov_vf(adev)) cancel_work(&adev->reset_work); @@ -5684,6 +5793,20 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, bool audio_suspended = false; int retry_limit = AMDGPU_MAX_RETRY_LIMIT; + if (amdgpu_reset_domain_in_drain_mode(adev->reset_domain)) + return 0; + + /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } /* * Special case: RAS triggered and full reset isn't supported */ @@ -6122,26 +6245,31 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) * Return true if @peer_adev can access (DMA) @adev through the PCIe * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of * @peer_adev. + * + * @note: CONFIG_HSA_AMD_P2P indicates support for P2P DMA mappings. Query + * P2PDMA distance only if the kernel has all the prerequisites for P2P DMA + * support. Otherwise fall back to the less reliable legacy P2P support to + * avoid regressions. + * */ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev) { -#ifdef CONFIG_HSA_AMD_P2P + bool p2p_access = true; uint64_t address_mask = peer_adev->dev->dma_mask ? ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size - 1; - bool p2p_access = +#ifdef CONFIG_HSA_AMD_P2P + p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); +#endif return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && adev->gmc.real_vram_size == adev->gmc.visible_vram_size && !(adev->gmc.aper_base & address_mask || aper_limit & address_mask)); -#else - return false; -#endif } int amdgpu_device_baco_enter(struct drm_device *dev) @@ -6376,6 +6504,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) struct amdgpu_device *adev = drm_to_adev(dev); int r; + if (amdgpu_sriov_vf(adev)) + return false; + r = pci_save_state(pdev); if (!r) { kfree(adev->pci_state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe6..0692eb154fb9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -232,6 +232,7 @@ static int hw_id_map[MAX_HWIP] = { [ISP_HWIP] = ISP_HWID, }; +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary) { u64 tmr_offset, tmr_size, pos; @@ -254,6 +255,7 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, return -ENOENT; } +#endif #define IP_DISCOVERY_V2 2 #define IP_DISCOVERY_V4 4 @@ -278,19 +280,23 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV if (vram_size) { +#endif uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, adev->mman.discovery_tmr_size, false); +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV } else { ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary); } +#endif return ret; } @@ -742,7 +748,11 @@ struct ip_hw_instance { u8 harvest; int num_base_addresses; +#ifdef __counted_by u32 base_addr[] __counted_by(num_base_addresses); +#else + u32 base_addr[]; +#endif }; struct ip_hw_id { @@ -829,7 +839,9 @@ static struct ip_hw_instance_attr ip_hw_attr[] = { }; static struct attribute *ip_hw_instance_attrs[ARRAY_SIZE(ip_hw_attr) + 1]; +#ifdef HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE ATTRIBUTE_GROUPS(ip_hw_instance); +#endif #define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj) #define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr) @@ -861,7 +873,11 @@ static void ip_hw_instance_release(struct kobject *kobj) static const struct kobj_type ip_hw_instance_ktype = { .release = ip_hw_instance_release, .sysfs_ops = &ip_hw_instance_sysfs_ops, +#ifdef HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE .default_groups = ip_hw_instance_groups, +#else + .default_attrs = ip_hw_instance_attrs, +#endif }; /* -------------------------------------------------- */ @@ -910,7 +926,9 @@ static struct attribute *ip_die_entry_attrs[] = { &num_ips_attr.attr, NULL, }; +#ifdef HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */ +#endif #define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset) @@ -943,7 +961,11 @@ static const struct sysfs_ops ip_die_entry_sysfs_ops = { static const struct kobj_type ip_die_entry_ktype = { .release = ip_die_entry_release, .sysfs_ops = &ip_die_entry_sysfs_ops, +#ifdef HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE .default_groups = ip_die_entry_groups, +#else + .default_attrs = ip_die_entry_attrs, +#endif }; static const struct kobj_type die_kobj_ktype = { @@ -1500,6 +1522,7 @@ union gc_info { struct gc_info_v1_0 v1; struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; + struct gc_info_v1_3 v1_3; struct gc_info_v2_0 v2; struct gc_info_v2_1 v2_1; }; @@ -1558,6 +1581,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); } + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) { + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu); + adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size); + adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size); + adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size); + adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); @@ -1712,37 +1745,75 @@ union nps_info { struct nps_info_v1_0 v1; }; +static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev, + union nps_info *nps_data) +{ + uint64_t vram_size, pos, offset; + struct nps_info_header *nhdr; + struct binary_header bhdr; + uint16_t checksum; + + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; + pos = vram_size - DISCOVERY_TMR_OFFSET; + amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false); + + offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset); + checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum); + + amdgpu_device_vram_access(adev, (pos + offset), nps_data, + sizeof(*nps_data), false); + + nhdr = (struct nps_info_header *)(nps_data); + if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data, + le32_to_cpu(nhdr->size_bytes), + checksum)) { + dev_err(adev->dev, "nps data refresh, checksum mismatch\n"); + return -EINVAL; + } + + return 0; +} + int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, uint32_t *nps_type, struct amdgpu_gmc_memrange **ranges, - int *range_cnt) + int *range_cnt, bool refresh) { struct amdgpu_gmc_memrange *mem_ranges; struct binary_header *bhdr; union nps_info *nps_info; + union nps_info nps_data; u16 offset; - int i; + int i, r; if (!nps_type || !range_cnt || !ranges) return -EINVAL; - if (!adev->mman.discovery_bin) { - dev_err(adev->dev, - "fetch mem range failed, ip discovery uninitialized\n"); - return -EINVAL; - } + if (refresh) { + r = amdgpu_discovery_refresh_nps_info(adev, &nps_data); + if (r) + return r; + nps_info = &nps_data; + } else { + if (!adev->mman.discovery_bin) { + dev_err(adev->dev, + "fetch mem range failed, ip discovery uninitialized\n"); + return -EINVAL; + } - bhdr = (struct binary_header *)adev->mman.discovery_bin; - offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset); + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset); - if (!offset) - return -ENOENT; + if (!offset) + return -ENOENT; - /* If verification fails, return as if NPS table doesn't exist */ - if (amdgpu_discovery_verify_npsinfo(adev, bhdr)) - return -ENOENT; + /* If verification fails, return as if NPS table doesn't exist */ + if (amdgpu_discovery_verify_npsinfo(adev, bhdr)) + return -ENOENT; - nps_info = (union nps_info *)(adev->mman.discovery_bin + offset); + nps_info = + (union nps_info *)(adev->mman.discovery_bin + offset); + } switch (le16_to_cpu(nps_info->v1.header.version_major)) { case 1: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index f5d36525ec3ef..b44d56465c5b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -33,6 +33,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, uint32_t *nps_type, struct amdgpu_gmc_memrange **ranges, - int *range_cnt); + int *range_cnt, bool refresh); #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 092ec11258cdd..6604fdc6c2e8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -76,6 +76,7 @@ void amdgpu_display_hotplug_work_func(struct work_struct *work) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) amdgpu_connector_hotplug(connector); + drm_connector_list_iter_end(&iter); mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ @@ -216,13 +217,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; /* schedule unpin of the old buffer */ - obj = crtc->primary->fb->obj[0]; + obj = drm_gem_fb_get_obj(crtc->primary->fb, 0); /* take a reference to the old object */ work->old_abo = gem_to_amdgpu_bo(obj); amdgpu_bo_ref(work->old_abo); - obj = fb->obj[0]; + obj = drm_gem_fb_get_obj(fb, 0); new_abo = gem_to_amdgpu_bo(obj); /* pin the new buffer */ @@ -233,6 +234,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, } if (!adev->enable_virtual_display) { + new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev, new_abo->flags)); if (unlikely(r != 0)) { @@ -247,7 +249,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + r = dma_resv_get_fences(amdkcl_ttm_resvp(&new_abo->tbo), DMA_RESV_USAGE_WRITE, &work->shared_count, &work->shared); if (unlikely(r != 0)) { @@ -414,8 +416,8 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) uint32_t devices; int i = 0; - drm_connector_list_iter_begin(dev, &iter); DRM_INFO("AMDGPU Display Connectors\n"); + drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { amdgpu_connector = to_amdgpu_connector(connector); DRM_INFO("Connector %d:\n", i); @@ -578,6 +580,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, return domain; } +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED static const struct drm_format_info dcc_formats[] = { { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, @@ -670,7 +673,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier) /* returning NULL will cause the default format structs to be used. */ return NULL; } - +#endif /* * Tries to extract the renderable DCC offset from the opaque metadata attached @@ -746,6 +749,7 @@ static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb return 0; } +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) { struct amdgpu_device *adev = drm_to_adev(afb->base.dev); @@ -939,6 +943,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) afb->base.flags |= DRM_MODE_FB_MODIFIERS; return 0; } +#endif /* Mirrors the is_displayable check in radeonsi's gfx6_compute_surface */ static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb) @@ -961,6 +966,7 @@ static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb) } } +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED static void get_block_dimensions(unsigned int block_log2, unsigned int cpp, unsigned int *width, unsigned int *height) { @@ -1043,17 +1049,16 @@ static int amdgpu_display_verify_plane(struct amdgpu_framebuffer *rfb, int plane (uint64_t)rfb->base.pitches[plane] / block_pitch * block_size * DIV_ROUND_UP(height, block_height); - if (rfb->base.obj[0]->size < size) { + if (drm_gem_fb_get_obj(&rfb->base, 0)->size < size) { drm_dbg_kms(rfb->base.dev, "BO size 0x%zx is less than 0x%llx required for plane %d\n", - rfb->base.obj[0]->size, size, plane); + drm_gem_fb_get_obj(&rfb->base, 0)->size, size, plane); return -EINVAL; } return 0; } - static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) { const struct drm_format_info *format_info = drm_format_info(rfb->base.format->format); @@ -1061,7 +1066,11 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) int ret; unsigned int i, block_width, block_height, block_size_log2; +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED if (rfb->base.dev->mode_config.fb_modifiers_not_supported) +#else + if (!rfb->base.dev->mode_config.allow_fb_modifiers) +#endif return 0; for (i = 0; i < format_info->num_planes; ++i) { @@ -1157,6 +1166,7 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) return 0; } +#endif static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, uint64_t *tiling_flags, bool *tmz_surface, @@ -1172,7 +1182,7 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb return 0; } - rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]); + rbo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(&((struct amdgpu_framebuffer *)amdgpu_fb)->base, 0)); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { @@ -1191,6 +1201,30 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb return r; } +int amdgpu_display_gem_fb_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) +{ + int ret; + rfb->base.obj[0] = obj; + drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); + + ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); + if (ret) + goto err; + + ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + if (ret) + goto err; + + return 0; +err: + drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret); + rfb->base.obj[0] = NULL; + return ret; +} + static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, struct amdgpu_framebuffer *rfb, struct drm_file *file_priv, @@ -1201,6 +1235,8 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); + +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED /* Verify that the modifier is supported. */ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -1211,6 +1247,7 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, ret = -EINVAL; goto err; } +#endif ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); if (ret) @@ -1244,6 +1281,7 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, * This needs to happen before modifier conversion as that might change * the number of planes. */ +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED for (i = 1; i < rfb->base.format->num_planes; ++i) { if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(dev, "Plane 0 and %d have different BOs: %u vs. %u\n", @@ -1252,13 +1290,19 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, return ret; } } +#endif ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface, &rfb->gfx12_dcc); if (ret) return ret; +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED if (dev->mode_config.fb_modifiers_not_supported && !adev->enable_virtual_display) { +#else + if (!dev->mode_config.allow_fb_modifiers && !adev->enable_virtual_display) { +#endif drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI, "GFX9+ requires FB check based on format modifier\n"); ret = check_tiling_flags_gfx6(rfb); @@ -1266,7 +1310,11 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, return ret; } +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED if (!dev->mode_config.fb_modifiers_not_supported && +#else + if (dev->mode_config.allow_fb_modifiers && +#endif !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) { if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ret = convert_tiling_flags_to_modifier_gfx12(rfb); @@ -1288,6 +1336,7 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, drm_gem_object_get(rfb->base.obj[0]); rfb->base.obj[i] = rfb->base.obj[0]; } +#endif return 0; } @@ -1317,13 +1366,17 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags); if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) { drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n"); +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED drm_gem_object_put(obj); +#endif return ERR_PTR(-EINVAL); } amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL); if (amdgpu_fb == NULL) { +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED drm_gem_object_put(obj); +#endif return ERR_PTR(-ENOMEM); } @@ -1331,11 +1384,16 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED drm_gem_object_put(obj); +#endif return ERR_PTR(ret); } +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED drm_gem_object_put(obj); +#endif + return &amdgpu_fb->base; } @@ -1474,7 +1532,11 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) && ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && - connector->display_info.is_hdmi && +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) + connector && connector->display_info.is_hdmi && +#else + drm_detect_hdmi_monitor(to_amdgpu_connector(connector)->edid) && +#endif amdgpu_display_is_hdtv_mode(mode)))) { if (amdgpu_encoder->underscan_hborder != 0) amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; @@ -1672,7 +1734,7 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, const struct drm_display_mode *mode) { struct drm_device *dev = crtc->dev; - unsigned int pipe = crtc->index; + unsigned int pipe = drm_crtc_index(crtc); return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, stime, etime, mode); @@ -1698,6 +1760,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_crtc *crtc; struct drm_connector *connector; + struct drm_connector_list_iter iter; int r; @@ -1709,6 +1772,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) drm_for_each_connector_iter(connector, &iter) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + drm_connector_list_iter_end(&iter); drm_modeset_unlock_all(dev); /* unpin the front buffers and cursors */ @@ -1727,10 +1791,11 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) } } - if (!fb || !fb->obj[0]) + if (!fb || !drm_gem_fb_get_obj(fb, 0)) continue; - robj = gem_to_amdgpu_bo(fb->obj[0]); + robj = gem_to_amdgpu_bo(drm_gem_fb_get_obj(fb, 0)); + if (!amdgpu_display_robj_is_fb(adev, robj)) { r = amdgpu_bo_reserve(robj, true); if (r == 0) { @@ -1759,6 +1824,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) r = amdgpu_bo_reserve(aobj, true); if (r == 0) { + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); if (r != 0) dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 8e81a83d37d84..640d344975db0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,6 +42,184 @@ #include #include +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +/** + * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation + * @obj: GEM BO + * @vma: Virtual memory area + * + * Sets up a userspace mapping of the BO's memory in the given + * virtual memory area. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + unsigned asize = amdgpu_bo_size(bo); + int ret; + + if (!vma->vm_file) + return -ENODEV; + + if (adev == NULL) + return -ENODEV; + + /* Check for valid size. */ + if (asize < vma->vm_end - vma->vm_start) + return -EINVAL; + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || + (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { + return -EPERM; + } + vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT; + + /* prime mmap does not need to check access, so allow here */ + ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data); + if (ret) + return ret; + + ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev); + drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data); + + return ret; +} +#endif + +#if defined(AMDKCL_AMDGPU_DMABUF_OPS) +#if defined(HAVE_DMA_BUF_OPS_LEGACY) +static int +__dma_resv_make_exclusive(struct dma_resv *obj) +{ + struct dma_fence **fences; + unsigned int count; + int r; + + if (!dma_resv_shared_list(obj)) /* no shared fences to convert */ + return 0; + + r = dma_resv_get_fences(obj, DMA_RESV_USAGE_READ, &count, &fences); + if (r) + return r; + + if (count == 0) { + /* Now that was unexpected. */ + } else if (count == 1) { + dma_resv_add_fence(obj, fences[0], DMA_RESV_USAGE_WRITE); + dma_fence_put(fences[0]); + kfree(fences); + } else { + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, + false); + if (!array) + goto err_fences_put; + + dma_resv_add_fence(obj, &array->base, DMA_RESV_USAGE_WRITE); + dma_fence_put(&array->base); + } + + return 0; + +err_fences_put: + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; +} + +/** + * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation + * @dma_buf: Shared DMA buffer + * @attach: DMA-buf attachment + * + * Makes sure that the shared DMA buffer can be accessed by the target device. + * For now, simply pins it to the GTT domain, where it should be accessible by + * all DMA devices. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + long r; + + r = drm_gem_map_attach(dma_buf, attach); + + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + goto error_detach; + + + if (attach->dev->driver != adev->dev->driver) { + /* + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. + */ + r = __dma_resv_make_exclusive(amdkcl_ttm_resvp(&bo->tbo)); + if (r) + goto error_unreserve; + } + + /* pin buffer into GTT */ + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto error_unreserve; + + +error_unreserve: + amdgpu_bo_unreserve(bo); + +error_detach: + if (r) + drm_gem_map_detach(dma_buf, attach); + return r; +} + +/** + * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation + * @dma_buf: Shared DMA buffer + * @attach: DMA-buf attachment + * + * This is called when a shared DMA buffer no longer needs to be accessible by + * another device. For now, simply unpins the buffer from GTT. + */ +static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + int ret = 0; + + ret = amdgpu_bo_reserve(bo, true); + if (unlikely(ret != 0)) + goto error; + + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + +error: + drm_gem_map_detach(dma_buf, attach); +} +#else /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -57,12 +235,15 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); +#ifdef HAVE_STRUCT_DMA_BUF_ATTACH_OPS_ALLOW_PEER2PEER if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; +#endif return 0; } +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN /** * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation * @@ -93,6 +274,7 @@ static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach) amdgpu_bo_unpin(bo); } +#endif /** * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation @@ -117,16 +299,19 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, struct sg_table *sgt; long r; +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN if (!bo->tbo.pin_count) { /* move buffer into GTT or VRAM */ struct ttm_operation_ctx ctx = { false, false }; unsigned int domains = AMDGPU_GEM_DOMAIN_GTT; +#ifdef HAVE_STRUCT_DMA_BUF_ATTACH_OPS_ALLOW_PEER2PEER if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && attach->peer2peer) { bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; domains |= AMDGPU_GEM_DOMAIN_VRAM; } +#endif amdgpu_bo_placement_from_domain(bo, domains); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) @@ -135,6 +320,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, } else if (bo->tbo.resource->mem_type != TTM_PL_TT) { return ERR_PTR(-EBUSY); } +#else + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + return ERR_PTR(r); +#endif switch (bo->tbo.resource->mem_type) { case TTM_PL_TT: @@ -181,6 +371,12 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { +#ifndef HAVE_STRUCT_DMA_BUF_OPS_PIN + struct dma_buf *dma_buf = attach->dmabuf; + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); +#endif + if (sgt->sgl->page_link) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); @@ -188,7 +384,12 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, } else { amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt); } + +#ifndef HAVE_STRUCT_DMA_BUF_OPS_PIN + amdgpu_bo_unpin(bo); +#endif } +#endif /** * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation @@ -232,17 +433,30 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, } const struct dma_buf_ops amdgpu_dmabuf_ops = { +#if defined(HAVE_DMA_BUF_OPS_LEGACY) + .attach = amdgpu_dma_buf_map_attach, + .detach = amdgpu_dma_buf_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, +#else +#ifdef HAVE_DMA_BUF_OPS_DYNAMIC_MAPPING + .dynamic_mapping = true, +#endif .attach = amdgpu_dma_buf_attach, +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN .pin = amdgpu_dma_buf_pin, .unpin = amdgpu_dma_buf_unpin, +#endif .map_dma_buf = amdgpu_dma_buf_map, .unmap_dma_buf = amdgpu_dma_buf_unmap, +#endif .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, }; +#endif /** * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation @@ -254,7 +468,12 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { * Returns: * Shared DMA buffer representing the GEM BO from the given device. */ +#ifdef HAVE_DRM_DRV_GEM_PRIME_EXPORT_PI struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, +#else +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, +#endif int flags) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); @@ -264,13 +483,140 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) return ERR_PTR(-EPERM); +#ifdef HAVE_DRM_DRV_GEM_PRIME_EXPORT_PI buf = drm_gem_prime_export(gobj, flags); - if (!IS_ERR(buf)) +#else + buf = drm_gem_prime_export(dev, gobj, flags); +#endif + + if (!IS_ERR(buf)) { +#ifdef AMDKCL_DMA_BUF_SHARE_ADDR_SPACE + buf->file->f_mapping = gobj->dev->anon_inode->i_mapping; +#endif +#if defined(AMDKCL_AMDGPU_DMABUF_OPS) buf->ops = &amdgpu_dmabuf_ops; +#endif + } return buf; } +#ifdef HAVE_DRM_DRIVER_GEM_PRIME_RES_OBJ +/** + * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation + * @obj: GEM BO + * + * Returns: + * The BO's reservation object. + */ +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdkcl_ttm_resvp(&bo->tbo); +} +#endif + +#if !defined(HAVE_DMA_BUF_OPS_DYNAMIC_MAPPING) && \ + !defined(HAVE_STRUCT_DMA_BUF_OPS_PIN) +/** + * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table + * implementation + * @obj: GEM buffer object (BO) + * + * Returns: + * A scatter/gather table for the pinned pages of the BO's memory. + */ +struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int npages = bo->tbo.ttm->num_pages; + + return drm_prime_pages_to_sg(obj->dev, bo->tbo.ttm->pages, npages); +} + +/** + * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table + * implementation + * @dev: DRM device + * @attach: DMA-buf attachment + * @sg: Scatter/gather table + * + * Imports shared DMA buffer memory exported by another device. + * + * Returns: + * A new GEM BO of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */ +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg) +{ + struct dma_resv *resv = attach->dmabuf->resv; + struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; + int ret; + + memset(&bp, 0, sizeof(bp)); + bp.size = attach->dmabuf->size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.flags = 0; + bp.type = ttm_bo_type_sg; + bp.resv = resv; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + dma_resv_lock(resv, NULL); + ret = amdgpu_bo_create(adev, &bp, &bo); + if (ret) + goto error; + + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + dma_resv_unlock(resv); + return &bo->tbo.base; + +error: + dma_resv_unlock(resv); + return ERR_PTR(ret); +} + +#ifdef AMDKCL_AMDGPU_DMABUF_OPS +/** + * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation + * @dev: DRM device + * @dma_buf: Shared DMA buffer + * + * The main work is done by the &drm_gem_prime_import helper, which in turn + * uses &amdgpu_gem_prime_import_sg_table. + * + * Returns: + * GEM BO representing the shared DMA buffer for the given device. + */ +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj; + + if (dma_buf->ops == &amdgpu_dmabuf_ops) { + obj = dma_buf->priv; + if (obj->dev == dev) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + } + + return drm_gem_prime_import(dev, dma_buf); +} +#endif +#else /** * amdgpu_dma_buf_create_obj - create BO for DMA-buf import * @@ -322,6 +668,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) return ERR_PTR(ret); } +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN /** * amdgpu_dma_buf_move_notify - &attach.move_notify implementation * @@ -357,7 +704,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - struct dma_resv *resv = vm->root.bo->tbo.base.resv; + struct dma_resv *resv = amdkcl_ttm_resvp(&vm->root.bo->tbo); if (ticket) { /* When we get an error here it means that somebody @@ -393,10 +740,12 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) } static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = { +#ifdef HAVE_STRUCT_DMA_BUF_ATTACH_OPS_ALLOW_PEER2PEER .allow_peer2peer = true, +#endif .move_notify = amdgpu_dma_buf_move_notify }; - +#endif /** * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation * @dev: DRM device @@ -429,8 +778,12 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, if (IS_ERR(obj)) return obj; +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN attach = dma_buf_dynamic_attach(dma_buf, dev->dev, &amdgpu_dma_buf_attach_ops, obj); +#else + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, true); +#endif if (IS_ERR(attach)) { drm_gem_object_put(obj); return ERR_CAST(attach); @@ -440,6 +793,50 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, obj->import_attach = attach; return obj; } +#endif + +#ifndef AMDKCL_AMDGPU_DMABUF_OPS +int amdgpu_gem_prime_pin(struct drm_gem_object *obj) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + long ret = 0; + + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret != 0)) + return ret; + + /* + * Wait for all shared fences to complete before we switch to future + * use of exclusive fence on this prime shared bo. + */ + ret = dma_resv_wait_timeout(bo->tbo.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(ret < 0)) { + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); + amdgpu_bo_unreserve(bo); + return ret; + } + + /* pin buffer into GTT */ + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + + amdgpu_bo_unreserve(bo); + return ret; +} + +void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int ret = 0; + + ret = amdgpu_bo_reserve(bo, true); + if (unlikely(ret != 0)) + return; + + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); +} +#endif /** * amdgpu_dmabuf_is_xgmi_accessible - Check if xgmi available for P2P transfer @@ -459,9 +856,11 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; +#if defined(AMDKCL_AMDGPU_DMABUF_OPS) if (dma_buf->ops != &amdgpu_dmabuf_ops) /* No XGMI with non AMD GPUs */ return false; +#endif gobj = dma_buf->priv; bo = gem_to_amdgpu_bo(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h index 3e93b9b407a97..dbc9384febd43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -25,12 +25,40 @@ #include +#if !defined(HAVE_DMA_BUF_OPS_DYNAMIC_MAPPING) && \ + !defined(HAVE_STRUCT_DMA_BUF_OPS_PIN) +struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg); +#endif + +#ifdef HAVE_DRM_DRV_GEM_PRIME_EXPORT_PI struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, +#else +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, +#endif int flags); +#if defined(AMDKCL_AMDGPU_DMABUF_OPS) struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); +#else +int amdgpu_gem_prime_pin(struct drm_gem_object *obj); +void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); +#endif bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct amdgpu_bo *bo); +#ifdef HAVE_DRM_DRIVER_GEM_PRIME_RES_OBJ +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); +#endif + +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma); +#endif + extern const struct dma_buf_ops amdgpu_dmabuf_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 094498a0964b5..41e9bf206f7eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -117,9 +117,10 @@ * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support + * - 3.59.0 - Cleared VRAM */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 58 +#define KMS_DRIVER_MINOR 59 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -131,6 +132,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), + AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -160,6 +162,8 @@ int amdgpu_exp_hw_support; int amdgpu_dc = -1; int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; +int amdgpu_no_evict; +int amdgpu_direct_gma_size; uint amdgpu_pcie_gen_cap; uint amdgpu_pcie_lane_cap; u64 amdgpu_cg_mask = 0xffffffffffffffff; @@ -168,6 +172,16 @@ uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; bool enforce_isolation; + +/* Specifies the default granularity for SVM, used in buffer + * migration and restoration of backing memory when handling + * recoverable page faults. + * + * The value is given as log(numPages(buffer)); for a 2 MiB + * buffer it computes to be 9 + */ +uint amdgpu_svm_default_granularity = 9; + /* * OverDrive(bit 14) disabled by default * GFX DCS(bit 19) disabled by default @@ -219,8 +233,6 @@ int amdgpu_wbrf = -1; int amdgpu_damage_clips = -1; /* auto */ int amdgpu_umsch_mm_fwlog; -static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); - DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, "DRM_UT_CORE", "DRM_UT_DRIVER", @@ -235,9 +247,6 @@ DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), - .delayed_reset_work = __DELAYED_WORK_INITIALIZER( - mgpu_info.delayed_reset_work, - amdgpu_drv_delayed_reset_work_handler, 0), }; int amdgpu_ras_enable = -1; uint amdgpu_ras_mask = 0xffffffff; @@ -319,6 +328,13 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444); MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); +/** + * DOC: svm_default_granularity (uint) + * Used in buffer migration and handling of recoverable page faults + */ +MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB"); +module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644); + /** * DOC: lockup_timeout (string) * Set GPU scheduler timeout value in ms. @@ -473,6 +489,12 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, hexint, 0444); +MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))"); +module_param_named(no_evict, amdgpu_no_evict, int, 0444); + +MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)"); +module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444); + /** * DOC: forcelongtraining (uint) * Force long memory training in resume. @@ -599,14 +621,13 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); */ #ifdef CONFIG_DRM_AMDGPU_SI -#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) +#if (0 && (IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE))) int amdgpu_si_support; MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_si_support = 1; MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)"); #endif - module_param_named(si_support, amdgpu_si_support, int, 0444); #endif @@ -618,14 +639,13 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); */ #ifdef CONFIG_DRM_AMDGPU_CIK -#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) +#if (0 && (IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE))) int amdgpu_cik_support; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_cik_support = 1; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)"); #endif - module_param_named(cik_support, amdgpu_cik_support, int, 0444); #endif @@ -827,6 +847,22 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa int amdgpu_no_queue_eviction_on_vm_fault; MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); + +/** + * DOC: priv_cp_queues (int) + * Enable privileged mode for CP queues. Default value: 0 (off) + */ +int priv_cp_queues; +module_param(priv_cp_queues, int, 0644); +MODULE_PARM_DESC(priv_cp_queues, "Enable privileged mode for CP queues (0 = off (default), 1 = on)"); + +/** + * DOC: keep_idle_process_evicted (bool) + * Keep an evicted process evicted if it is idle. Default value: false (off) + */ +bool keep_idle_process_evicted; +module_param(keep_idle_process_evicted, bool, 0444); +MODULE_PARM_DESC(keep_idle_process_evicted, "Restore evicted process only if queues are active (N = off(default), Y = on)"); #endif /** @@ -840,11 +876,9 @@ module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); * DOC: pcie_p2p (bool) * Enable PCIe P2P (requires large-BAR). Default value: true (on) */ -#ifdef CONFIG_HSA_AMD_P2P bool pcie_p2p = true; module_param(pcie_p2p, bool, 0444); MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))"); -#endif /** * DOC: dcfeaturemask (uint) @@ -2136,6 +2170,10 @@ static const struct pci_device_id pciidlist[] = { .class_mask = 0xffffff, .driver_data = CHIP_IP_DISCOVERY }, +#ifdef HAVE_DRM_AMDGPU_PCIID_H +#include +#endif + {0, 0, 0} }; @@ -2149,7 +2187,7 @@ static const struct amdgpu_asic_type_quirk asic_type_quirks[] = { {0x67FF, 0xF7, CHIP_POLARIS10}, }; -static const struct drm_driver amdgpu_kms_driver; +static struct drm_driver amdgpu_kms_driver; static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev) { @@ -2199,6 +2237,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: enable RAS ACA\n"); adev->debug_enable_ras_aca = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_EXP_RESETS) { + pr_info("debug: enable experimental reset features\n"); + adev->debug_exp_resets = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2232,8 +2275,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENODEV; } - if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev)) - amdgpu_aspm = 0; + if (flags == 0) { + DRM_INFO("Unsupported asic. Remove me when IP discovery init is in place.\n"); + return -ENODEV; + } if (amdgpu_virtual_display || amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) @@ -2301,13 +2346,25 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, adev->pdev = pdev; ddev = adev_to_drm(adev); + /* Check and increase the vma range */ + kcl_drm_vma_offset_manager_adjust(ddev->vma_offset_manager); + if (!supports_atomic) ddev->driver_features &= ~DRIVER_ATOMIC; + kcl_pci_create_measure_file(pdev); + kcl_pci_configure_extended_tags(pdev); ret = pci_enable_device(pdev); if (ret) +#ifndef AMDKCL_DEVM_DRM_DEV_ALLOC return ret; +#else + goto err_free; +#endif +#ifdef HAVE_DRM_DEVICE_PDEV + ddev->pdev = pdev; +#endif pci_set_drvdata(pdev, ddev); amdgpu_init_debug_options(adev); @@ -2400,6 +2457,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, err_pci: pci_disable_device(pdev); +#ifdef AMDKCL_DEVM_DRM_DEV_ALLOC +err_free: + amdkcl_drm_dev_release(ddev); +#endif return ret; } @@ -2410,8 +2471,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct amdgpu_device *adev = drm_to_adev(dev); amdgpu_xcp_dev_unplug(adev); + amdgpu_gmc_prepare_nps_mode_change(adev); drm_dev_unplug(dev); - if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); @@ -2424,10 +2485,24 @@ amdgpu_pci_remove(struct pci_dev *pdev) * Clear the Bus Master Enable bit and then wait on the PCIe Device * StatusTransactions Pending bit. */ + kcl_pci_remove_measure_file(pdev); pci_disable_device(pdev); pci_wait_for_pending_transaction(pdev); +#ifdef AMDKCL_DEVM_DRM_DEV_ALLOC + amdkcl_drm_dev_release(dev); +#endif } +#ifndef HAVE_DRM_DRM_MANAGED_H +static void amdgpu_driver_release(struct drm_device *ddev) +{ + struct amdgpu_device *adev = drm_to_adev(ddev); + + drm_dev_fini(ddev); + kfree(adev); +} +#endif + static void amdgpu_pci_shutdown(struct pci_dev *pdev) { @@ -2448,82 +2523,6 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) adev->mp1_state = PP_MP1_STATE_NONE; } -/** - * amdgpu_drv_delayed_reset_work_handler - work handler for reset - * - * @work: work_struct. - */ -static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) -{ - struct list_head device_list; - struct amdgpu_device *adev; - int i, r; - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - mutex_lock(&mgpu_info.mutex); - if (mgpu_info.pending_reset == true) { - mutex_unlock(&mgpu_info.mutex); - return; - } - mgpu_info.pending_reset = true; - mutex_unlock(&mgpu_info.mutex); - - /* Use a common context, just need to make sure full reset is done */ - reset_context.method = AMD_RESET_METHOD_NONE; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - - for (i = 0; i < mgpu_info.num_dgpu; i++) { - adev = mgpu_info.gpu_ins[i].adev; - reset_context.reset_req_dev = adev; - r = amdgpu_device_pre_asic_reset(adev, &reset_context); - if (r) { - dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", - r, adev_to_drm(adev)->unique); - } - if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work)) - r = -EALREADY; - } - for (i = 0; i < mgpu_info.num_dgpu; i++) { - adev = mgpu_info.gpu_ins[i].adev; - flush_work(&adev->xgmi_reset_work); - adev->gmc.xgmi.pending_reset = false; - } - - /* reset function will rebuild the xgmi hive info , clear it now */ - for (i = 0; i < mgpu_info.num_dgpu; i++) - amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev); - - INIT_LIST_HEAD(&device_list); - - for (i = 0; i < mgpu_info.num_dgpu; i++) - list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list); - - /* unregister the GPU first, reset function will add them back */ - list_for_each_entry(adev, &device_list, reset_list) - amdgpu_unregister_gpu_instance(adev); - - /* Use a common context, just need to make sure full reset is done */ - set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); - r = amdgpu_do_asic_reset(&device_list, &reset_context); - - if (r) { - DRM_ERROR("reinit gpus failure"); - return; - } - for (i = 0; i < mgpu_info.num_dgpu; i++) { - adev = mgpu_info.gpu_ins[i].adev; - if (!adev->kfd.init_complete) { - kgd2kfd_init_zone_device(adev); - amdgpu_amdkfd_device_init(adev); - amdgpu_amdkfd_drm_client_create(adev); - } - amdgpu_ttm_set_buffer_funcs_status(adev, true); - } -} - static int amdgpu_pmops_prepare(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); @@ -2899,7 +2898,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .flush = amdgpu_flush, .release = drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, - .mmap = drm_gem_mmap, + .mmap = amdkcl_drm_gem_mmap, .poll = drm_poll, .read = drm_read, #ifdef CONFIG_COMPAT @@ -2943,28 +2942,91 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_DGMA, amdgpu_gem_dgma_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW) }; -static const struct drm_driver amdgpu_kms_driver = { +static struct drm_driver amdgpu_kms_driver = { .driver_features = - DRIVER_ATOMIC | - DRIVER_GEM | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | - DRIVER_SYNCOBJ_TIMELINE, + DRIVER_ATOMIC + | DRIVER_HAVE_IRQ +#ifdef HAVE_DRM_DRV_DRIVER_IRQ_SHARED + | DRIVER_IRQ_SHARED +#endif /* HAVE_DRM_DRV_DRIVER_IRQ_SHARED */ +#ifdef HAVE_DRM_DRV_DRIVER_PRIME + | DRIVER_PRIME +#endif /* HAVE_DRM_DRV_DRIVER_PRIME */ + | DRIVER_GEM + | DRIVER_RENDER | DRIVER_MODESET + | DRIVER_SYNCOBJ +#ifdef HAVE_DRM_DRV_DRIVER_SYNCOBJ_TIMELINE + | DRIVER_SYNCOBJ_TIMELINE +#endif /* HAVE_DRM_DRV_DRIVER_SYNCOBJ_TIMELINE */ + , .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, +#if defined(CONFIG_DEBUG_FS) +#if defined(AMDKCL_AMDGPU_DEBUGFS_CLEANUP) + .debugfs_cleanup = amdgpu_debugfs_cleanup, +#endif +#endif + +#ifndef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP + .get_vblank_counter = kcl_amdgpu_get_vblank_counter_kms, + .enable_vblank = kcl_amdgpu_enable_vblank_kms, + .disable_vblank = kcl_amdgpu_disable_vblank_kms, + .get_vblank_timestamp = kcl_amdgpu_get_vblank_timestamp_kms, + .get_scanout_position = kcl_amdgpu_get_crtc_scanout_position, +#endif +#ifdef CONFIG_DRM_LEGACY + .irq_handler = amdgpu_irq_handler, +#endif .ioctls = amdgpu_ioctls_kms, +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK + .gem_free_object_unlocked = amdgpu_gem_object_free, + .gem_open_object = amdgpu_gem_object_open, + .gem_close_object = amdgpu_gem_object_close, +#endif .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), .dumb_create = amdgpu_mode_dumb_create, .dumb_map_offset = amdgpu_mode_dumb_mmap, .fops = &amdgpu_driver_kms_fops, .release = &amdgpu_driver_release_kms, +#ifdef HAVE_DRM_DRIVER_SHOW_FDINFO #ifdef CONFIG_PROC_FS .show_fdinfo = amdgpu_show_fdinfo, #endif +#endif + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK + .gem_prime_export = amdgpu_gem_prime_export, +#endif +#if defined(AMDKCL_AMDGPU_DMABUF_OPS) .gem_prime_import = amdgpu_gem_prime_import, +#else + .gem_prime_import = drm_gem_prime_import, + .gem_prime_pin = amdgpu_gem_prime_pin, + .gem_prime_unpin = amdgpu_gem_prime_unpin, +#endif +#ifdef HAVE_DRM_DRIVER_GEM_PRIME_RES_OBJ + .gem_prime_res_obj = amdgpu_gem_prime_res_obj, +#endif +#if defined(HAVE_DMA_BUF_OPS_LEGACY) + .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, + .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, +#endif + +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK + .gem_prime_vmap = amdgpu_gem_prime_vmap, + .gem_prime_vunmap = amdgpu_gem_prime_vunmap, +#endif + +#ifdef HAVE_DRM_DRIVER_GEM_PRIME_MMAP + .gem_prime_mmap = amdkcl_drm_gem_prime_mmap, +#endif .name = DRIVER_NAME, .desc = DRIVER_DESC, @@ -2976,8 +3038,11 @@ static const struct drm_driver amdgpu_kms_driver = { const struct drm_driver amdgpu_partition_driver = { .driver_features = - DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ | - DRIVER_SYNCOBJ_TIMELINE, + DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ +#ifdef HAVE_DRM_DRV_DRIVER_SYNCOBJ_TIMELINE + | DRIVER_SYNCOBJ_TIMELINE +#endif /* HAVE_DRM_DRV_DRIVER_SYNCOBJ_TIMELINE */ + , .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, @@ -2988,7 +3053,12 @@ const struct drm_driver amdgpu_partition_driver = { .fops = &amdgpu_driver_kms_fops, .release = &amdgpu_driver_release_kms, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = amdgpu_gem_prime_import, +#ifdef HAVE_DRM_DRIVER_GEM_PRIME_MMAP + .gem_prime_mmap = drm_gem_prime_mmap, +#endif .name = DRIVER_NAME, .desc = DRIVER_DESC, @@ -3005,12 +3075,14 @@ static struct pci_error_handlers amdgpu_pci_err_handler = { .resume = amdgpu_pci_resume, }; +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS static const struct attribute_group *amdgpu_sysfs_groups[] = { &amdgpu_vram_mgr_attr_group, &amdgpu_gtt_mgr_attr_group, &amdgpu_flash_attr_group, NULL, }; +#endif static struct pci_driver amdgpu_kms_pci_driver = { .name = DRIVER_NAME, @@ -3020,7 +3092,9 @@ static struct pci_driver amdgpu_kms_pci_driver = { .shutdown = amdgpu_pci_shutdown, .driver.pm = &amdgpu_pm_ops, .err_handler = &amdgpu_pci_err_handler, +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS .dev_groups = amdgpu_sysfs_groups, +#endif }; static int __init amdgpu_init(void) @@ -3039,6 +3113,10 @@ static int __init amdgpu_init(void) goto error_fence; DRM_INFO("amdgpu kernel modesetting enabled.\n"); + + DRM_INFO("amdgpu version: %s\n", AMDGPU_VERSION); + DRM_INFO("OS DRM version: %d.%d.%d\n", DRM_VER, DRM_PATCH, DRM_SUB); + amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); @@ -3060,10 +3138,14 @@ static void __exit amdgpu_exit(void) amdgpu_amdkfd_fini(); pci_unregister_driver(&amdgpu_kms_pci_driver); amdgpu_unregister_atpx_handler(); +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV amdgpu_acpi_release(); +#endif amdgpu_sync_fini(); amdgpu_fence_slab_fini(); +#ifdef HAVE_MMU_NOTIFIER_SYNCHRONIZE mmu_notifier_synchronize(); +#endif amdgpu_xcp_drv_release(); } @@ -3073,3 +3155,4 @@ module_exit(amdgpu_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL and additional rights"); +MODULE_VERSION(AMDGPU_VERSION); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index 3aaeed2d35620..dbd12456ff5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -222,7 +222,11 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, case DRM_MODE_CONNECTOR_HDMIB: if (amdgpu_connector->use_digital) { /* HDMI 1.3 supports up to 340 Mhz over single link */ +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) if (connector->display_info.is_hdmi) { +#else + if (drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif if (pixel_clock > 340000) return true; else @@ -244,7 +248,11 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, return false; else { /* HDMI 1.3 supports up to 340 Mhz over single link */ +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) if (connector->display_info.is_hdmi) { +#else + if (drm_detect_hdmi_monitor(amdgpu_connector->edid)) { +#endif if (pixel_clock > 340000) return true; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index c7df7fa3459f1..fd5c979d54479 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -102,8 +102,13 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) drm_printf(p, "drm-shared-cpu:\t%llu KiB\n", stats.cpu_shared/1024UL); for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { +#ifdef HAVE_KTIME_IS_UNION + if (!usage[hw_ip].tv64) + continue; +#else if (!usage[hw_ip]) continue; +#endif drm_printf(p, "drm-engine-%s:\t%lld ns\n", amdgpu_ip_name[hw_ip], ktime_to_ns(usage[hw_ip])); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 2f24a6aa13bf6..d4f3fb3519c81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -759,7 +759,6 @@ void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error) */ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) { - amdgpu_fence_driver_set_error(ring, -ECANCELED); amdgpu_fence_write(ring, ring->fence_drv.sync_seq); amdgpu_fence_process(ring); } @@ -878,6 +877,7 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, .enable_signaling = amdgpu_fence_enable_signaling, + AMDKCL_DMA_FENCE_OPS_WAIT_OPTIONAL .release = amdgpu_fence_release, }; @@ -885,6 +885,7 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_job_fence_get_timeline_name, .enable_signaling = amdgpu_job_fence_enable_signaling, + AMDKCL_DMA_FENCE_OPS_WAIT_OPTIONAL .release = amdgpu_job_fence_release, }; @@ -940,9 +941,9 @@ static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused) * * Manually trigger a gpu reset at the next fence wait. */ -static int gpu_recover_get(void *data, u64 *val) +static int amdgpu_debugfs_gpu_recover_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)data; + struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct drm_device *dev = adev_to_drm(adev); int r; @@ -955,7 +956,7 @@ static int gpu_recover_get(void *data, u64 *val) if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work)) flush_work(&adev->reset_work); - *val = atomic_read(&adev->reset_domain->reset_res); + // *val = atomic_read(&adev->reset_domain->reset_res); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -964,8 +965,7 @@ static int gpu_recover_get(void *data, u64 *val) } DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info); -DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL, - "%lld\n"); +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_gpu_recover); static void amdgpu_debugfs_reset_work(struct work_struct *work) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h index bc58dca18035a..98f3196599ef7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h @@ -32,7 +32,7 @@ struct amdgpu_fru_info { char product_name[AMDGPU_PRODUCT_NAME_LEN]; char serial[20]; char manufacturer_name[32]; - char fru_id[32]; + char fru_id[50]; }; int amdgpu_fru_get_product_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 8283d682f543b..7cc980bf4725d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -55,8 +55,6 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index aad2027e5c7cb..55f80f94e3926 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -43,11 +43,20 @@ #include "amdgpu_hmm.h" #include "amdgpu_xgmi.h" +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +void amdgpu_gem_object_free(struct drm_gem_object *gobj) +#else static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; +#ifndef HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG +static vm_fault_t amdgpu_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#else static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { - struct ttm_buffer_object *bo = vmf->vma->vm_private_data; + struct vm_area_struct *vma = vmf->vma; +#endif + struct ttm_buffer_object *bo = vma->vm_private_data; struct drm_device *ddev = bo->base.dev; vm_fault_t ret; int idx; @@ -63,18 +72,18 @@ static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) goto unlock; } - ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); + ret = ttm_bo_vm_fault_reserved(vmf, vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); } else { - ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + ret = ttm_bo_vm_dummy_page(vmf, vma->vm_page_prot); } if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) return ret; unlock: - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); return ret; } @@ -86,10 +95,26 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = { }; static void amdgpu_gem_object_free(struct drm_gem_object *gobj) +#endif { struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); + struct amdgpu_device *adev = amdgpu_ttm_adev(robj->tbo.bdev); if (robj) { + if (robj->flags & AMDGPU_GEM_CREATE_NO_EVICT) { + if (!amdgpu_bo_reserve(robj, false)) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); + } + } + + if (robj->tbo.resource && robj->tbo.resource->mem_type == AMDGPU_PL_DGMA) + atomic64_sub(amdgpu_bo_size(robj), + &adev->direct_gma.vram_usage); + else if (robj->tbo.resource && robj->tbo.resource->mem_type == AMDGPU_PL_DGMA_IMPORT) + atomic64_sub(amdgpu_bo_size(robj), + &adev->direct_gma.gart_usage); + amdgpu_hmm_unregister(robj); amdgpu_bo_unref(&robj); } @@ -104,12 +129,30 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, struct amdgpu_bo *bo; struct amdgpu_bo_user *ubo; struct amdgpu_bo_param bp; + unsigned long max_size; int r; memset(&bp, 0, sizeof(bp)); *obj = NULL; flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + if ((initial_domain & AMDGPU_GEM_DOMAIN_DGMA) || + (initial_domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT)) { + flags |= AMDGPU_GEM_CREATE_NO_EVICT; + max_size = (unsigned long)amdgpu_direct_gma_size << 20; + + if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA) + max_size -= atomic64_read(&adev->direct_gma.vram_usage); + else if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT) + max_size -= atomic64_read(&adev->direct_gma.gart_usage); + + if (size > max_size) { + DRM_DEBUG("Allocation size %ldMb bigger than %ldMb limit\n", + size >> 20, max_size >> 20); + return -ENOMEM; + } + } + bp.size = size; bp.byte_align = alignment; bp.type = type; @@ -126,7 +169,14 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bo = &ubo->bo; *obj = &bo->tbo.base; +#ifndef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK (*obj)->funcs = &amdgpu_gem_object_funcs; +#endif + + if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA) + atomic64_add(size, &adev->direct_gma.vram_usage); + else if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT) + atomic64_add(size, &adev->direct_gma.gart_usage); return 0; } @@ -159,8 +209,13 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev) * Call from drm_gem_handle_create which appear in both new and open ioctl * case. */ +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +int amdgpu_gem_object_open(struct drm_gem_object *obj, + struct drm_file *file_priv) +#else static int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv) +#endif { struct amdgpu_bo *abo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -224,8 +279,13 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, return r; } +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +void amdgpu_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv) +#else static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv) +#endif { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -274,6 +334,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, drm_exec_fini(&exec); } +#ifndef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -294,17 +355,28 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str return drm_gem_ttm_mmap(obj, vma); } +#endif +#ifndef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { .free = amdgpu_gem_object_free, .open = amdgpu_gem_object_open, .close = amdgpu_gem_object_close, .export = amdgpu_gem_prime_export, +#ifdef HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG .vmap = drm_gem_ttm_vmap, .vunmap = drm_gem_ttm_vunmap, +#elif defined(HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_2ARGS) + .vmap = amdgpu_drm_gem_ttm_vmap, + .vunmap = amdgpu_drm_gem_ttm_vunmap, +#else + .vmap = amdgpu_gem_prime_vmap, + .vunmap = amdgpu_gem_prime_vunmap, +#endif .mmap = amdgpu_gem_object_mmap, .vm_ops = &amdgpu_gem_vm_ops, }; +#endif /* * GEM ioctls. @@ -336,6 +408,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_EXPLICIT_SYNC | AMDGPU_GEM_CREATE_ENCRYPTED | AMDGPU_GEM_CREATE_GFX12_DCC | + AMDGPU_GEM_CREATE_NO_EVICT | AMDGPU_GEM_CREATE_DISCARDABLE)) return -EINVAL; @@ -366,7 +439,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (r) return r; - resv = vm->root.bo->tbo.base.resv; + resv = amdkcl_ttm_resvp(&vm->root.bo->tbo); } initial_domain = (u32)(0xffffffff & args->in.domains); @@ -476,14 +549,28 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, } r = drm_gem_handle_create(filp, gobj, &handle); +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED if (r) goto user_pages_done; args->handle = handle; +#else + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(gobj); + if (r) + return r; + + args->handle = handle; + return 0; +#endif user_pages_done: +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); +#else + release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +#endif release_object: drm_gem_object_put(gobj); @@ -491,6 +578,62 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, return r; } +int amdgpu_gem_dgma_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + struct drm_amdgpu_gem_dgma *args = data; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct drm_gem_object *gobj; + struct amdgpu_bo *abo; + dma_addr_t *dma_addr; + uint32_t handle; + int i, r = 0; + + switch (args->op) { + case AMDGPU_GEM_DGMA_IMPORT: + /* create a gem object to contain this object in */ + r = amdgpu_gem_object_create(adev, args->size, 0, + AMDGPU_GEM_DOMAIN_DGMA_IMPORT, 0, + 0, NULL, &gobj, fpriv->xcp_id + 1); + if (r) + return r; + + abo = gem_to_amdgpu_bo(gobj); + dma_addr = kmalloc_array(PFN_UP(abo->tbo.resource->size), sizeof(dma_addr_t), GFP_KERNEL); + if (unlikely(dma_addr == NULL)) + goto release_object; + + for (i = 0; i < PFN_UP(abo->tbo.resource->size); i++) + dma_addr[i] = args->addr + i * PAGE_SIZE; + abo->dgma_import_base = args->addr; + abo->dgma_addr = (void *)dma_addr; + r = drm_gem_handle_create(filp, gobj, &handle); + args->handle = handle; + break; + case AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR: + gobj = drm_gem_object_lookup(filp, args->handle); + if (gobj == NULL) + return -ENOENT; + + abo = gem_to_amdgpu_bo(gobj); + if (abo->tbo.resource->mem_type != AMDGPU_PL_DGMA) { + r = -EINVAL; + goto release_object; + } + args->addr = amdgpu_bo_gpu_offset(abo) - + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM) + + adev->gmc.aper_base; + break; + default: + return -EINVAL; + } + +release_object: + drm_gem_object_put(gobj); + return r; +} + int amdgpu_mode_dumb_mmap(struct drm_file *filp, struct drm_device *dev, uint32_t handle, uint64_t *offset_p) @@ -567,7 +710,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + ret = dma_resv_wait_timeout(amdkcl_ttm_resvp(&robj->tbo), DMA_RESV_USAGE_READ, true, timeout); /* ret == 0 means not signaled, @@ -639,10 +782,11 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, * vital here, so they are not reported back to userspace. */ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, - struct amdgpu_vm *vm, + struct amdgpu_fpriv *fpriv, struct amdgpu_bo_va *bo_va, uint32_t operation) { + struct amdgpu_vm *vm = &fpriv->vm; int r; if (!amdgpu_vm_ready(vm)) @@ -660,6 +804,25 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, } r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) + goto error; + + if (vm->is_compute_context) { + if (bo_va->last_pt_update) + r = dma_fence_wait(bo_va->last_pt_update, true); + if (!r && vm->last_update) + r = dma_fence_wait(vm->last_update, true); + if (!r) { + uint32_t xcc_mask = (!adev->xcp_mgr || + fpriv->xcp_id == ~0) ? 1 : + adev->xcp_mgr->xcp[fpriv->xcp_id] + .ip[AMDGPU_XCP_GFX].inst_mask; + + r = amdgpu_vm_flush_compute_tlb(adev, vm, + TLB_FLUSH_LEGACY, + xcc_mask); + } + } error: if (r && r != -ERESTARTSYS) @@ -827,7 +990,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, + amdgpu_gem_va_update_vm(adev, fpriv, bo_va, args->operation); error: @@ -963,6 +1126,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, args->pitch = amdgpu_gem_align_pitch(adev, args->width, DIV_ROUND_UP(args->bpp, 8), 0); + args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_domain(adev, @@ -1008,7 +1172,12 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused) */ rcu_read_lock(); pid = rcu_dereference(file->pid); - task = pid_task(pid, PIDTYPE_TGID); + task = pid_task(pid, +#ifdef HAVE_PIDTYPE_TGID + PIDTYPE_TGID); +#else + PIDTYPE_PID); +#endif seq_printf(m, "pid %8d command %s:\n", pid_nr(pid), task ? task->comm : ""); rcu_read_unlock(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9be8cafdcecc9..8c9fcfb7f22e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -24,10 +24,13 @@ */ #include +#include + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" @@ -84,16 +87,6 @@ int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, return bit; } -void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, - int *me, int *pipe, int *queue) -{ - *queue = bit % adev->gfx.me.num_queue_per_pipe; - *pipe = (bit / adev->gfx.me.num_queue_per_pipe) - % adev->gfx.me.num_pipe_per_me; - *me = (bit / adev->gfx.me.num_queue_per_pipe) - / adev->gfx.me.num_pipe_per_me; -} - bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue) { @@ -412,7 +405,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, } /* prepare MQD backup */ - kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL); + kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL); if (!kiq->mqd_backup) { dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); @@ -435,7 +428,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring->mqd_size = mqd_size; /* prepare MQD backup */ - adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); + adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL); if (!adev->gfx.me.mqd_backup[i]) { dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); return -ENOMEM; @@ -459,7 +452,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring->mqd_size = mqd_size; /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL); + adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL); if (!adev->gfx.mec.mqd_backup[j]) { dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); return -ENOMEM; @@ -657,7 +650,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) uint64_t queue_mask = 0; int r, i, j; - if (adev->enable_mes) + if (adev->mes.enable_legacy_queue_map) return amdgpu_gfx_mes_enable_kcq(adev, xcc_id); if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) @@ -719,7 +712,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) amdgpu_device_flush_hdp(adev, NULL); - if (adev->enable_mes) { + if (adev->mes.enable_legacy_queue_map) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) { j = i + xcc_id * adev->gfx.num_gfx_rings; r = amdgpu_mes_map_legacy_queue(adev, @@ -892,6 +885,9 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (r) return r; + if (amdgpu_sriov_vf(adev)) + return r; + if (adev->gfx.cp_ecc_error_irq.funcs) { r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); if (r) @@ -1360,37 +1356,248 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, return count; } +static const char *xcp_desc[] = { + [AMDGPU_SPX_PARTITION_MODE] = "SPX", + [AMDGPU_DPX_PARTITION_MODE] = "DPX", + [AMDGPU_TPX_PARTITION_MODE] = "TPX", + [AMDGPU_QPX_PARTITION_MODE] = "QPX", + [AMDGPU_CPX_PARTITION_MODE] = "CPX", +}; + static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, struct device_attribute *addr, char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - char *supported_partition; + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + int size = 0, mode; + char *sep = ""; - /* TBD */ - switch (NUM_XCC(adev->gfx.xcc_mask)) { - case 8: - supported_partition = "SPX, DPX, QPX, CPX"; - break; - case 6: - supported_partition = "SPX, TPX, CPX"; - break; - case 4: - supported_partition = "SPX, DPX, CPX"; - break; - /* this seems only existing in emulation phase */ - case 2: - supported_partition = "SPX, CPX"; - break; - default: - supported_partition = "Not supported"; - break; + if (!xcp_mgr || !xcp_mgr->avail_xcp_modes) + return sysfs_emit(buf, "Not supported\n"); + + for_each_inst(mode, xcp_mgr->avail_xcp_modes) { + size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]); + sep = ", "; + } + + size += sysfs_emit_at(buf, size, "\n"); + + return size; +} + +static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct drm_gpu_scheduler *sched = &ring->sched; + struct drm_sched_entity entity; + struct dma_fence *f; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + int i, r; + + /* Initialize the scheduler entity */ + r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r) { + dev_err(adev->dev, "Failed setting up GFX kernel entity.\n"); + goto err; + } + + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, + 64, 0, + &job); + if (r) + goto err; + + job->enforce_isolation = true; + + ib = &job->ibs[0]; + for (i = 0; i <= ring->funcs->align_mask; ++i) + ib->ptr[i] = ring->funcs->nop; + ib->length_dw = ring->funcs->align_mask + 1; + + f = amdgpu_job_submit(job); + + r = dma_fence_wait(f, false); + if (r) + goto err; + + dma_fence_put(f); + + /* Clean up the scheduler entity */ + drm_sched_entity_destroy(&entity); + return 0; + +err: + return r; +} + +static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) +{ + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); + struct amdgpu_ring *ring; + int num_xcc_to_clear; + int i, r, xcc_id; + + if (adev->gfx.num_xcc_per_xcp) + num_xcc_to_clear = adev->gfx.num_xcc_per_xcp; + else + num_xcc_to_clear = 1; + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + if ((ring->xcp_id == xcp_id) && ring->sched.ready) { + r = amdgpu_gfx_run_cleaner_shader_job(ring); + if (r) + return r; + num_xcc_to_clear--; + break; + } + } + } + + if (num_xcc_to_clear) + return -ENOENT; + + return 0; +} + +static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int ret; + long value; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = kstrtol(buf, 0, &value); + + if (ret) + return -EINVAL; + + if (value < 0) + return -EINVAL; + + if (adev->xcp_mgr) { + if (value >= adev->xcp_mgr->num_xcps) + return -EINVAL; + } else { + if (value > 1) + return -EINVAL; } - return sysfs_emit(buf, "%s\n", supported_partition); + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + ret = amdgpu_gfx_run_cleaner_shader(adev, value); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return ret; + + return count; +} + +static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int i; + ssize_t size = 0; + + if (adev->xcp_mgr) { + for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { + size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); + if (i < (adev->xcp_mgr->num_xcps - 1)) + size += sysfs_emit_at(buf, size, " "); + } + buf[size++] = '\n'; + } else { + size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); + } + + return size; } +static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + long partition_values[MAX_XCP] = {0}; + int ret, i, num_partitions; + const char *input_buf = buf; + + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + ret = sscanf(input_buf, "%ld", &partition_values[i]); + if (ret <= 0) + break; + + /* Move the pointer to the next value in the string */ + input_buf = strchr(input_buf, ' '); + if (input_buf) { + input_buf++; + } else { + i++; + break; + } + } + num_partitions = i; + + if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) + return -EINVAL; + + if (!adev->xcp_mgr && num_partitions != 1) + return -EINVAL; + + for (i = 0; i < num_partitions; i++) { + if (partition_values[i] != 0 && partition_values[i] != 1) + return -EINVAL; + } + + mutex_lock(&adev->enforce_isolation_mutex); + + for (i = 0; i < num_partitions; i++) { + if (adev->enforce_isolation[i] && !partition_values[i]) { + /* Going from enabled to disabled */ + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); + } else if (!adev->enforce_isolation[i] && partition_values[i]) { + /* Going from disabled to enabled */ + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } + adev->enforce_isolation[i] = partition_values[i]; + } + + mutex_unlock(&adev->enforce_isolation_mutex); + + return count; +} + +static DEVICE_ATTR(run_cleaner_shader, 0200, + NULL, amdgpu_gfx_set_run_cleaner_shader); + +static DEVICE_ATTR(enforce_isolation, 0644, + amdgpu_gfx_get_enforce_isolation, + amdgpu_gfx_set_enforce_isolation); + static DEVICE_ATTR(current_compute_partition, 0644, amdgpu_gfx_get_current_compute_partition, amdgpu_gfx_set_compute_partition); @@ -1400,19 +1607,316 @@ static DEVICE_ATTR(available_compute_partition, 0444, int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) { + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + bool xcp_switch_supported; int r; + if (!xcp_mgr) + return 0; + + xcp_switch_supported = + (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode); + + if (!xcp_switch_supported) + dev_attr_current_compute_partition.attr.mode &= + ~(S_IWUSR | S_IWGRP | S_IWOTH); + r = device_create_file(adev->dev, &dev_attr_current_compute_partition); if (r) return r; - r = device_create_file(adev->dev, &dev_attr_available_compute_partition); + if (xcp_switch_supported) + r = device_create_file(adev->dev, + &dev_attr_available_compute_partition); return r; } void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) { + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + bool xcp_switch_supported; + + if (!xcp_mgr) + return; + + xcp_switch_supported = + (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode); device_remove_file(adev->dev, &dev_attr_current_compute_partition); - device_remove_file(adev->dev, &dev_attr_available_compute_partition); + + if (xcp_switch_supported) + device_remove_file(adev->dev, + &dev_attr_available_compute_partition); +} + +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +{ + int r; + + r = device_create_file(adev->dev, &dev_attr_enforce_isolation); + if (r) + return r; + + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); + if (r) + return r; + + return 0; +} + +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_enforce_isolation); + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); +} + +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size) +{ + if (!adev->gfx.enable_cleaner_shader) + return -EOPNOTSUPP; + + return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr) + memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, + cleaner_shader_size); +} + +/** + * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) + * @adev: amdgpu_device pointer + * @idx: Index of the scheduler to control + * @enable: Whether to enable or disable the KFD scheduler + * + * This function is used to control the KFD (Kernel Fusion Driver) scheduler + * from the KGD. It is part of the cleaner shader feature. This function plays + * a key role in enforcing process isolation on the GPU. + * + * The function uses a reference count mechanism (kfd_sch_req_count) to keep + * track of the number of requests to enable the KFD scheduler. When a request + * to enable the KFD scheduler is made, the reference count is decremented. + * When the reference count reaches zero, a delayed work is scheduled to + * enforce isolation after a delay of GFX_SLICE_PERIOD. + * + * When a request to disable the KFD scheduler is made, the function first + * checks if the reference count is zero. If it is, it cancels the delayed work + * for enforcing isolation and checks if the KFD scheduler is active. If the + * KFD scheduler is active, it sends a request to stop the KFD scheduler and + * sets the KFD scheduler state to inactive. Then, it increments the reference + * count. + * + * The function is synchronized using the kfd_sch_mutex to ensure that the KFD + * scheduler state and reference count are updated atomically. + * + * Note: If the reference count is already zero when a request to enable the + * KFD scheduler is made, it means there's an imbalance bug somewhere. The + * function triggers a warning in this case. + */ +static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, + bool enable) +{ + mutex_lock(&adev->gfx.kfd_sch_mutex); + + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); + goto unlock; + } + + adev->gfx.kfd_sch_req_count[idx]--; + + if (adev->gfx.kfd_sch_req_count[idx] == 0 && + adev->gfx.kfd_sch_inactive[idx]) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); + } + } else { + if (adev->gfx.kfd_sch_req_count[idx] == 0) { + cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); + if (!adev->gfx.kfd_sch_inactive[idx]) { + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = true; + } + } + + adev->gfx.kfd_sch_req_count[idx]++; + } + +unlock: + mutex_unlock(&adev->gfx.kfd_sch_mutex); +} + +/** + * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation + * + * @work: work_struct. + * + * This function is the work handler for enforcing shader isolation on AMD GPUs. + * It counts the number of emitted fences for each GFX and compute ring. If there + * are any fences, it schedules the `enforce_isolation_work` to be run after a + * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion + * Driver (KFD) to resume the runqueue. The function is synchronized using the + * `enforce_isolation_mutex`. + */ +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) +{ + struct amdgpu_isolation_work *isolation_work = + container_of(work, struct amdgpu_isolation_work, work.work); + struct amdgpu_device *adev = isolation_work->adev; + u32 i, idx, fences = 0; + + if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = isolation_work->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { + if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); + } + for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) { + if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); + } + if (fences) { + /* we've already had our timeslice, so let's wrap this up */ + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + msecs_to_jiffies(1)); + } else { + /* Tell KFD to resume the runqueue */ + if (adev->kfd.init_complete) { + WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + amdgpu_amdkfd_start_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = false; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +static void +amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, + u32 idx) +{ + unsigned long cjiffies; + bool wait = false; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + /* set the initial values if nothing is set */ + if (!adev->gfx.enforce_isolation_jiffies[idx]) { + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + /* Make sure KFD gets a chance to run */ + if (amdgpu_amdkfd_compute_active(adev, idx)) { + cjiffies = jiffies; + if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) { + cjiffies -= adev->gfx.enforce_isolation_jiffies[idx]; + if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) { + /* if our time is up, let KGD work drain before scheduling more */ + wait = true; + /* reset the timer period */ + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } else { + /* set the timer period to what's left in our time slice */ + adev->gfx.enforce_isolation_time[idx] = + GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies); + } + } else { + /* if jiffies wrap around we will just wait a little longer */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + } + } else { + /* if there is no KFD work, then set the full slice period */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); + + if (wait) + msleep(GFX_SLICE_PERIOD_MS); +} + +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + /* Don't submit more work until KFD has had some time */ + amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + } + mutex_unlock(&adev->enforce_isolation_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 86d3fa7eef904..fbc6240febf52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -34,6 +34,7 @@ #include "soc15.h" #include "amdgpu_ras.h" #include "amdgpu_ring_mux.h" +#include "amdgpu_xcp.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -138,6 +139,10 @@ struct kiq_pm4_funcs { void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, uint16_t pasid, uint32_t flush_type, bool all_hub); + void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring, + uint32_t queue_type, uint32_t me_id, + uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid); /* Packet sizes */ int set_resources_size; int map_queues_size; @@ -156,6 +161,15 @@ struct amdgpu_kiq { void *mqd_backup; }; +struct spm_funcs { + void (*start)(struct amdgpu_device *adev); + void (*stop)(struct amdgpu_device *adev); + void (*set_rdptr)(struct amdgpu_device *adev, u32 rptr); + void (*set_spm_perfmon_ring_buf)(struct amdgpu_device *adev, u64 gpu_rptr, u32 size); + /* Packet sizes */ + int set_spm_config_size; +}; + /* * GFX configurations */ @@ -240,6 +254,12 @@ struct amdgpu_gfx_config { uint32_t gc_tcp_size_per_cu; uint32_t gc_num_cu_per_sqc; uint32_t gc_tcc_size; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_cache_line_size; }; struct amdgpu_cu_info { @@ -339,6 +359,12 @@ struct amdgpu_me { DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); }; +struct amdgpu_isolation_work { + struct amdgpu_device *adev; + u32 xcp_id; + struct delayed_work work; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -350,6 +376,7 @@ struct amdgpu_gfx { struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES]; struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES]; struct amdgpu_imu imu; + const struct spm_funcs *spmfuncs; bool rs64_enable; /* firmware format */ const struct firmware *me_fw; /* ME firmware */ uint32_t me_fw_version; @@ -393,6 +420,7 @@ struct amdgpu_gfx { struct amdgpu_irq_src priv_inst_irq; struct amdgpu_irq_src bad_op_irq; struct amdgpu_irq_src cp_ecc_error_irq; + struct amdgpu_irq_src spm_irq; struct amdgpu_irq_src sq_irq; struct amdgpu_irq_src rlc_gc_fed_irq; struct sq_work sq_work; @@ -440,6 +468,23 @@ struct amdgpu_gfx { uint32_t *ip_dump_core; uint32_t *ip_dump_compute_queues; uint32_t *ip_dump_gfx_queues; + + struct mutex reset_sem_mutex; + + /* cleaner shader */ + struct amdgpu_bo *cleaner_shader_obj; + unsigned int cleaner_shader_size; + u64 cleaner_shader_gpu_addr; + void *cleaner_shader_cpu_ptr; + const void *cleaner_shader_ptr; + bool enable_cleaner_shader; + struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; + /* Mutex for synchronizing KFD scheduler operations */ + struct mutex kfd_sch_mutex; + u64 kfd_sch_req_count[MAX_XCP]; + bool kfd_sch_inactive[MAX_XCP]; + unsigned long enforce_isolation_jiffies[MAX_XCP]; + unsigned long enforce_isolation_time[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { @@ -508,8 +553,6 @@ bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue); -void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, - int *me, int *pipe, int *queue); bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); @@ -541,6 +584,17 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, void *ras_error_status, void (*func)(struct amdgpu_device *adev, void *ras_error_status, int xcc_id)); +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size); +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr); +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index 103a837ccc712..c7b44aeb671b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,8 +38,6 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b49b3650fd621..e3974cf40256b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -786,7 +786,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, goto failed_kiq; might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); @@ -893,6 +894,27 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gmc->noretry = 1; else gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; + + /* keep this for kfd test fail */ + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: + /* + * noretry = 0 will cause kfd page fault tests fail + * for some ASICs, so set default to 1 for these ASICs. + */ + if (amdgpu_noretry == -1) + gmc->noretry = 1; + else + gmc->noretry = amdgpu_noretry; + break; + default: + break; + } } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, @@ -1064,18 +1086,6 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo)); } -/** - * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address - * from CPU's view - * - * @adev: amdgpu_device pointer - * @bo: amdgpu buffer object - */ -uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo) -{ - return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; -} - int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) { struct amdgpu_bo *vram_bo = NULL; @@ -1129,6 +1139,79 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) return ret; } +static const char *nps_desc[] = { + [AMDGPU_NPS1_PARTITION_MODE] = "NPS1", + [AMDGPU_NPS2_PARTITION_MODE] = "NPS2", + [AMDGPU_NPS3_PARTITION_MODE] = "NPS3", + [AMDGPU_NPS4_PARTITION_MODE] = "NPS4", + [AMDGPU_NPS6_PARTITION_MODE] = "NPS6", + [AMDGPU_NPS8_PARTITION_MODE] = "NPS8", +}; + +static ssize_t available_memory_partition_show(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int size = 0, mode; + char *sep = ""; + + for_each_inst(mode, adev->gmc.supported_nps_modes) { + size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]); + sep = ", "; + } + size += sysfs_emit_at(buf, size, "\n"); + + return size; +} + +static ssize_t current_memory_partition_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_memory_partition mode; + struct amdgpu_hive_info *hive; + int i; + + mode = UNKNOWN_MEMORY_PARTITION_MODE; + for_each_inst(i, adev->gmc.supported_nps_modes) { + if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) { + mode = i; + break; + } + } + + if (mode == UNKNOWN_MEMORY_PARTITION_MODE) + return -EINVAL; + + if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) { + dev_info( + adev->dev, + "requested NPS mode is same as current NPS mode, skipping\n"); + return count; + } + + /* If device is part of hive, all devices in the hive should request the + * same mode. Hence store the requested mode in hive. + */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + atomic_set(&hive->requested_nps_mode, mode); + amdgpu_put_xgmi_hive(hive); + } else { + adev->gmc.requested_nps_mode = mode; + } + + dev_info( + adev->dev, + "NPS mode change requested, please remove and reload the driver\n"); + + return count; +} + static ssize_t current_memory_partition_show( struct device *dev, struct device_attribute *addr, char *buf) { @@ -1137,53 +1220,65 @@ static ssize_t current_memory_partition_show( enum amdgpu_memory_partition mode; mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); - switch (mode) { - case AMDGPU_NPS1_PARTITION_MODE: - return sysfs_emit(buf, "NPS1\n"); - case AMDGPU_NPS2_PARTITION_MODE: - return sysfs_emit(buf, "NPS2\n"); - case AMDGPU_NPS3_PARTITION_MODE: - return sysfs_emit(buf, "NPS3\n"); - case AMDGPU_NPS4_PARTITION_MODE: - return sysfs_emit(buf, "NPS4\n"); - case AMDGPU_NPS6_PARTITION_MODE: - return sysfs_emit(buf, "NPS6\n"); - case AMDGPU_NPS8_PARTITION_MODE: - return sysfs_emit(buf, "NPS8\n"); - default: + if ((mode > ARRAY_SIZE(nps_desc)) || + (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode)) return sysfs_emit(buf, "UNKNOWN\n"); - } + + return sysfs_emit(buf, "%s\n", nps_desc[mode]); } -static DEVICE_ATTR_RO(current_memory_partition); +static DEVICE_ATTR_RW(current_memory_partition); +static DEVICE_ATTR_RO(available_memory_partition); int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev) { + bool nps_switch_support; + int r = 0; + if (!adev->gmc.gmc_funcs->query_mem_partition_mode) return 0; + nps_switch_support = (hweight32(adev->gmc.supported_nps_modes & + AMDGPU_ALL_NPS_MASK) > 1); + if (!nps_switch_support) + dev_attr_current_memory_partition.attr.mode &= + ~(S_IWUSR | S_IWGRP | S_IWOTH); + else + r = device_create_file(adev->dev, + &dev_attr_available_memory_partition); + + if (r) + return r; + return device_create_file(adev->dev, &dev_attr_current_memory_partition); } void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) { + if (!adev->gmc.gmc_funcs->query_mem_partition_mode) + return; + device_remove_file(adev->dev, &dev_attr_current_memory_partition); + device_remove_file(adev->dev, &dev_attr_available_memory_partition); } int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, struct amdgpu_mem_partition_info *mem_ranges, - int exp_ranges) + uint8_t *exp_ranges) { struct amdgpu_gmc_memrange *ranges; int range_cnt, ret, i, j; uint32_t nps_type; + bool refresh; - if (!mem_ranges) + if (!mem_ranges || !exp_ranges) return -EINVAL; + refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) && + (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS); ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges, - &range_cnt); + &range_cnt, refresh); if (ret) return ret; @@ -1191,16 +1286,16 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, /* TODO: For now, expect ranges and partition count to be the same. * Adjust if there are holes expected in any NPS domain. */ - if (range_cnt != exp_ranges) { + if (*exp_ranges && (range_cnt != *exp_ranges)) { dev_warn( adev->dev, "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d", - exp_ranges, nps_type, range_cnt); + *exp_ranges, nps_type, range_cnt); ret = -EINVAL; goto err; } - for (i = 0; i < exp_ranges; ++i) { + for (i = 0; i < range_cnt; ++i) { if (ranges[i].base_address >= ranges[i].limit_address) { dev_warn( adev->dev, @@ -1241,8 +1336,81 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, ranges[i].limit_address - ranges[i].base_address + 1; } + if (!*exp_ranges) + *exp_ranges = range_cnt; err: kfree(ranges); return ret; } + +int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev, + int nps_mode) +{ + /* Not supported on VF devices and APUs */ + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) + return -EOPNOTSUPP; + + if (!adev->psp.funcs) { + dev_err(adev->dev, + "PSP interface not available for nps mode change request"); + return -EINVAL; + } + + return psp_memory_partition(&adev->psp, nps_mode); +} + +static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev, + int req_nps_mode, + int cur_nps_mode) +{ + return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) == + BIT(req_nps_mode)) && + req_nps_mode != cur_nps_mode); +} + +void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev) +{ + int req_nps_mode, cur_nps_mode, r; + struct amdgpu_hive_info *hive; + + if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes || + !adev->gmc.gmc_funcs->request_mem_partition_mode) + return; + + cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + req_nps_mode = atomic_read(&hive->requested_nps_mode); + if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, + cur_nps_mode)) { + amdgpu_put_xgmi_hive(hive); + return; + } + r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode); + amdgpu_put_xgmi_hive(hive); + goto out; + } + + req_nps_mode = adev->gmc.requested_nps_mode; + if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode)) + return; + + /* even if this fails, we should let driver unload w/o blocking */ + r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode); +out: + if (r) + dev_err(adev->dev, "NPS mode change request failed\n"); + else + dev_info( + adev->dev, + "NPS mode change request done, reload driver to complete the change\n"); +} + +bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev) +{ + if (adev->gmc.gmc_funcs->need_reset_on_init) + return adev->gmc.gmc_funcs->need_reset_on_init(adev); + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 4d951a1baefab..459a30fe239f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -73,6 +73,13 @@ enum amdgpu_memory_partition { AMDGPU_NPS8_PARTITION_MODE = 8, }; +#define AMDGPU_ALL_NPS_MASK \ + (BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS2_PARTITION_MODE) | \ + BIT(AMDGPU_NPS3_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE) | \ + BIT(AMDGPU_NPS6_PARTITION_MODE) | BIT(AMDGPU_NPS8_PARTITION_MODE)) + +#define AMDGPU_GMC_INIT_RESET_NPS BIT(0) + /* * GMC page fault information */ @@ -161,6 +168,10 @@ struct amdgpu_gmc_funcs { enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); + /* Request NPS mode */ + int (*request_mem_partition_mode)(struct amdgpu_device *adev, + int nps_mode); + bool (*need_reset_on_init)(struct amdgpu_device *adev); }; struct amdgpu_xgmi_ras { @@ -182,7 +193,6 @@ struct amdgpu_xgmi { bool supported; struct ras_common_if *ras_if; bool connected_to_cpu; - bool pending_reset; struct amdgpu_xgmi_ras *ras; }; @@ -305,6 +315,9 @@ struct amdgpu_gmc { struct amdgpu_mem_partition_info *mem_partitions; uint8_t num_mem_partitions; const struct amdgpu_gmc_funcs *gmc_funcs; + enum amdgpu_memory_partition requested_nps_mode; + uint32_t supported_nps_modes; + uint32_t reset_flags; struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; @@ -447,13 +460,17 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev); void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev); uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); -uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev); void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev); int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, struct amdgpu_mem_partition_info *mem_ranges, - int exp_ranges); + uint8_t *exp_ranges); + +int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev, + int nps_mode); +void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev); +bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 0760e70402ec1..071241ccfb646 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -76,6 +76,7 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO, amdgpu_mem_info_gtt_used_show, NULL); +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS static struct attribute *amdgpu_gtt_mgr_attributes[] = { &dev_attr_mem_info_gtt_total.attr, &dev_attr_mem_info_gtt_used.attr, @@ -85,6 +86,7 @@ static struct attribute *amdgpu_gtt_mgr_attributes[] = { const struct attribute_group amdgpu_gtt_mgr_attr_group = { .attrs = amdgpu_gtt_mgr_attributes }; +#endif /** * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space @@ -277,6 +279,9 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; struct ttm_resource_manager *man = &mgr->manager; uint64_t start, size; +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + int ret; +#endif man->use_tt = true; man->func = &amdgpu_gtt_mgr_func; @@ -288,6 +293,19 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); + return ret; + } +#endif + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); ttm_resource_manager_set_used(man, true); return 0; @@ -316,7 +334,10 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) spin_lock(&mgr->lock); drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); - +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); +#endif ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index e36fede7f74c3..ca7304e7f45b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -51,8 +51,668 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_hmm.h" -#define MAX_WALK_BYTE (2UL << 30) +#ifndef HAVE_AMDKCL_HMM_MIRROR_ENABLED +/** + * struct amdgpu_mn + * + * @adev: amdgpu device pointer + * @mm: process address space + * @mn: MMU notifier structure + * @type: type of MMU notifier + * @work: destruction work item + * @node: hash table node to find structure by adev and mn + * @lock: rw semaphore protecting the notifier nodes + * @objects: interval tree containing amdgpu_mn_nodes + * @read_lock: mutex for recursive locking of @lock + * @recursion: depth of recursion + * + * Data for each amdgpu device and process address space. + */ +struct amdgpu_mn { + /* constant after initialisation */ + struct amdgpu_device *adev; + struct mm_struct *mm; + struct mmu_notifier mn; + enum amdgpu_mn_type type; + + /* protected by adev->mn_lock */ + struct hlist_node node; + + /* objects protected by lock */ + struct rw_semaphore lock; +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + struct rb_root objects; +#else + struct rb_root_cached objects; +#endif + struct mutex read_lock; + atomic_t recursion; +#if !defined(HAVE_MMU_NOTIFIER_PUT) + struct rcu_head rcu; +#endif +}; + +/** + * struct amdgpu_mn_node + * + * @it: interval node defining start-last of the affected address range + * @bos: list of all BOs in the affected address range + * + * Manages all BOs which are affected of a certain range of address space. + */ +struct amdgpu_mn_node { + struct interval_tree_node it; + struct list_head bos; +}; + +#ifdef HAVE_MMU_NOTIFIER_PUT +static void amdgpu_mn_free(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct amdgpu_mn, mn)); +} +#else +static void amdgpu_mn_free(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct amdgpu_mn, rcu)); +} +#endif + +/** + * amdgpu_mn_destroy - destroy the MMU notifier + * + * @amn: our notifier + * + * Destroy the notifier + */ +static void amdgpu_mn_destroy(struct amdgpu_mn *amn) +{ + struct amdgpu_device *adev = amn->adev; + struct amdgpu_mn_node *node, *next_node; + struct amdgpu_bo *bo, *next_bo; + + mutex_lock(&adev->mn_lock); + down_write(&amn->lock); + hash_del(&amn->node); + rbtree_postorder_for_each_entry_safe(node, next_node, +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + &amn->objects, it.rb) { +#else + &amn->objects.rb_root, it.rb) { +#endif + list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { + bo->mn = NULL; + list_del_init(&bo->mn_list); + } + kfree(node); + } + +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + amn->objects = RB_ROOT; +#else + amn->objects = RB_ROOT_CACHED; +#endif + + up_write(&amn->lock); + +#ifdef HAVE_MMU_NOTIFIER_PUT + mmu_notifier_put(&amn->mn); +#else + mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + mmu_notifier_call_srcu(&amn->rcu, amdgpu_mn_free); +#endif + + mutex_unlock(&adev->mn_lock); +} + +/** + * amdgpu_mn_release - callback to notify about mm destruction + * + * @mn: our notifier + * @mm: the mm this callback is about + * + * Destroy our notifier. + */ +static void amdgpu_mn_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + + amdgpu_mn_destroy(amn); +} + + +/** + * amdgpu_mn_lock - take the write side lock for this notifier + * + * @mn: our notifier + */ +void amdgpu_mn_lock(struct amdgpu_mn *mn) +{ + if (mn) + down_write(&mn->lock); +} +/** + * amdgpu_mn_unlock - drop the write side lock for this notifier + * + * @mn: our notifier + */ +void amdgpu_mn_unlock(struct amdgpu_mn *mn) +{ + if (mn) + up_write(&mn->lock); +} + +#if !defined(HAVE_5ARGS_INVALIDATE_RANGE_START) && !defined(HAVE_2ARGS_INVALIDATE_RANGE_START) +/** + * amdgpu_mn_read_lock - take the read side lock for this notifier + * + * @amn: our notifier + */ +static void amdgpu_mn_read_lock(struct amdgpu_mn *amn) +{ + /* FIXME: Need figure out one way to detect + * if we are in oom reaper context. + */ + mutex_lock(&amn->read_lock); + if (atomic_inc_return(&amn->recursion) == 1) + down_read_non_owner(&amn->lock); + mutex_unlock(&amn->read_lock); +} +#else +/** + * amdgpu_mn_read_lock - take the read side lock for this notifier + * + * @amn: our notifier + */ +static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) +{ + /* Non blockable occurs only in oom reaper context. + * In this case, process is going to be killed anyway. + * Let oom reaper fail at this stage. + */ + if (!blockable) + return -EAGAIN; + + mutex_lock(&amn->read_lock); + if (atomic_inc_return(&amn->recursion) == 1) + down_read_non_owner(&amn->lock); + mutex_unlock(&amn->read_lock); + + return 0; +} +#endif + +/** + * amdgpu_mn_read_unlock - drop the read side lock for this notifier + * + * @amn: our notifier + */ +static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) +{ + if (atomic_dec_return(&amn->recursion) == 0) + up_read_non_owner(&amn->lock); +} + +/** + * amdgpu_mn_invalidate_node - unmap all BOs of a node + * + * @node: the node with the BOs to unmap + * @start: start of address range affected + * @end: end of address range affected + * + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. + */ +static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, + unsigned long start, + unsigned long end) +{ + struct amdgpu_bo *bo; + long r; + unsigned long userptr; + + list_for_each_entry(bo, &node->bos, mn_list) { + + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end, &userptr)) + continue; + + r = dma_resv_wait_timeout(amdkcl_ttm_resvp(&bo->tbo), + true, false, MAX_SCHEDULE_TIMEOUT); + if (r <= 0) + DRM_ERROR("(%ld) failed to wait for user bo\n", r); + + amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); + } +} + +#if defined(HAVE_2ARGS_INVALIDATE_RANGE_START) +/** + * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * + * @mn: our notifier + * @range: mmu notifier context + * + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. + */ +static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + const struct mmu_notifier_range *range) +{ + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + unsigned long end; + + /* notification is exclusive, but interval is inclusive */ + end = range->end - 1; + + /* TODO we should be able to split locking for interval tree and + * amdgpu_mn_invalidate_node + */ + if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + return -EAGAIN; + + it = interval_tree_iter_first(&amn->objects, range->start, end); + while (it) { + struct amdgpu_mn_node *node; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, range->start, end); + + amdgpu_mn_invalidate_node(node, range->start, end); + } + + return 0; +} + +/** + * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * + * @mn: our notifier + * @mm: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We temporarily evict all BOs between start and end. This + * necessitates evicting all user-mode queues of the process. The BOs + * are restorted in amdgpu_mn_invalidate_range_end_hsa. + */ +static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + const struct mmu_notifier_range *range) +{ + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + unsigned long end; + unsigned long userptr; + + /* notification is exclusive, but interval is inclusive */ + end = range->end - 1; + + if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + return -EAGAIN; + + it = interval_tree_iter_first(&amn->objects, range->start, end); + while (it) { + struct amdgpu_mn_node *node; + struct amdgpu_bo *bo; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, range->start, end); + + list_for_each_entry(bo, &node->bos, mn_list) { + struct kgd_mem *mem = bo->kfd_bo; + + if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + range->start, + end, &userptr)) + amdgpu_amdkfd_evict_userptr(mem, range->mm); + } + } + + return 0; +} + +#else + +/** + * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * + * @mn: our notifier + * @mm: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. + */ +#if defined(HAVE_5ARGS_INVALIDATE_RANGE_START) +static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end, + bool blockable) +#else +static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +#endif +{ + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + +#if defined(HAVE_5ARGS_INVALIDATE_RANGE_START) + if (amdgpu_mn_read_lock(amn, blockable)) + return -EAGAIN; +#else + amdgpu_mn_read_lock(amn); +#endif + + it = interval_tree_iter_first(&amn->objects, start, end); + while (it) { + struct amdgpu_mn_node *node; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, start, end); + + amdgpu_mn_invalidate_node(node, start, end); + } + +#if defined(HAVE_5ARGS_INVALIDATE_RANGE_START) + return 0; +#endif +} + + +/** + * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * + * @mn: our notifier + * @mm: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We temporarily evict all BOs between start and end. This + * necessitates evicting all user-mode queues of the process. The BOs + * are restorted in amdgpu_mn_invalidate_range_end_hsa. + */ +#if defined(HAVE_5ARGS_INVALIDATE_RANGE_START) +static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end, + bool blockable) +#else +static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +#endif +{ + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + unsigned long userptr; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + +#if defined(HAVE_5ARGS_INVALIDATE_RANGE_START) + if (amdgpu_mn_read_lock(amn, blockable)) + return -EAGAIN; +#else + amdgpu_mn_read_lock(amn); +#endif + + it = interval_tree_iter_first(&amn->objects, start, end); + while (it) { + struct amdgpu_mn_node *node; + struct amdgpu_bo *bo; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, start, end); + + list_for_each_entry(bo, &node->bos, mn_list) { + struct kgd_mem *mem = bo->kfd_bo; + + if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + start, end, &userptr)) + amdgpu_amdkfd_evict_userptr(mem, mm); + } + } + +#if defined(HAVE_5ARGS_INVALIDATE_RANGE_START) + return 0; +#endif +} + +#endif + +/** + * amdgpu_mn_invalidate_range_end - callback to notify about mm change + * + * @mn: our notifier + * @mm: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * Release the lock again to allow new command submissions. + */ +static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, +#ifdef HAVE_2ARGS_INVALIDATE_RANGE_START + const struct mmu_notifier_range *range) +#else + struct mm_struct *mm, + unsigned long start, + unsigned long end) +#endif +{ + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + + amdgpu_mn_read_unlock(amn); +} + +static const struct mmu_notifier_ops amdgpu_mn_ops[] = { + [AMDGPU_MN_TYPE_GFX] = { +#ifdef HAVE_MMU_NOTIFIER_PUT + .free_notifier = amdgpu_mn_free, +#endif + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, + [AMDGPU_MN_TYPE_HSA] = { +#ifdef HAVE_MMU_NOTIFIER_PUT + .free_notifier = amdgpu_mn_free, +#endif + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, +}; + +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. + */ +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) + +/** + * amdgpu_mn_get - create notifier context + * + * @adev: amdgpu device pointer + * @type: type of MMU notifier context + * + * Creates a notifier context for current->mm. + */ +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) +{ + struct mm_struct *mm = current->mm; + struct amdgpu_mn *amn; + unsigned long key = AMDGPU_MN_KEY(mm, type); + int r; + + mutex_lock(&adev->mn_lock); +#ifndef HAVE_DOWN_WRITE_KILLABLE + down_write(&mm->mmap_sem); +#else + if (down_write_killable(&mm->mmap_sem)) { + mutex_unlock(&adev->mn_lock); + return ERR_PTR(-EINTR); + } +#endif + + hash_for_each_possible(adev->mn_hash, amn, node, key) + if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) + goto release_locks; + + amn = kzalloc(sizeof(*amn), GFP_KERNEL); + if (!amn) { + amn = ERR_PTR(-ENOMEM); + goto release_locks; + } + + amn->adev = adev; + amn->mm = mm; + init_rwsem(&amn->lock); + amn->type = type; + amn->mn.ops = &amdgpu_mn_ops[type]; +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + amn->objects = RB_ROOT; +#else + amn->objects = RB_ROOT_CACHED; +#endif + mutex_init(&amn->read_lock); + atomic_set(&amn->recursion, 0); + + r = __mmu_notifier_register(&amn->mn, mm); + if (r) + goto free_amn; + + hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); + +release_locks: + up_write(&mm->mmap_sem); + mutex_unlock(&adev->mn_lock); + + return amn; + +free_amn: + up_write(&mm->mmap_sem); + mutex_unlock(&adev->mn_lock); + kfree(amn); + + return ERR_PTR(r); +} + +/** + * amdgpu_hmm_register - register a BO for notifier updates + * + * @bo: amdgpu buffer object + * @addr: userptr addr we should monitor + * + * Registers an MMU notifier for the given BO at the specified address. + * Returns 0 on success, -ERRNO if anything goes wrong. + */ +int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) +{ + unsigned long end = addr + amdgpu_bo_size(bo) - 1; + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + enum amdgpu_mn_type type = + bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; + struct amdgpu_mn *amn; + struct amdgpu_mn_node *node = NULL, *new_node; + struct list_head bos; + struct interval_tree_node *it; + + amn = amdgpu_mn_get(adev, type); + if (IS_ERR(amn)) + return PTR_ERR(amn); + + new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + + INIT_LIST_HEAD(&bos); + + down_write(&amn->lock); + + while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { + kfree(node); + node = container_of(it, struct amdgpu_mn_node, it); + interval_tree_remove(&node->it, &amn->objects); + addr = min(it->start, addr); + end = max(it->last, end); + list_splice(&node->bos, &bos); + } + + if (!node) + node = new_node; + else + kfree(new_node); + + bo->mn = amn; + + node->it.start = addr; + node->it.last = end; + INIT_LIST_HEAD(&node->bos); + list_splice(&bos, &node->bos); + list_add(&bo->mn_list, &node->bos); + + interval_tree_insert(&node->it, &amn->objects); + + up_write(&amn->lock); + + return 0; +} + +/** + * amdgpu_hmm_unregister - unregister a BO for notifier updates + * + * @bo: amdgpu buffer object + * + * Remove any registration of MMU notifier updates from the buffer object. + */ +void amdgpu_hmm_unregister(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_mn *amn; + struct list_head *head; + + mutex_lock(&adev->mn_lock); + + amn = bo->mn; + if (amn == NULL) { + mutex_unlock(&adev->mn_lock); + return; + } + + down_write(&amn->lock); + + /* save the next list entry for later */ + head = bo->mn_list.next; + + bo->mn = NULL; + list_del_init(&bo->mn_list); + + if (list_empty(head)) { + struct amdgpu_mn_node *node; + + node = container_of(head, struct amdgpu_mn_node, bos); + interval_tree_remove(&node->it, &amn->objects); + kfree(node); + } + + up_write(&amn->lock); + mutex_unlock(&adev->mn_lock); +} + +#else /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ + +#define MAX_WALK_BYTE (2UL << 30) /** * amdgpu_hmm_invalidate_gfx - callback to notify about mm change * @@ -78,7 +738,7 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, + r = dma_resv_wait_timeout(amdkcl_ttm_resvp(&bo->tbo), DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) @@ -165,6 +825,21 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo) bo->notifier.mm = NULL; } +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; +#endif + int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, struct page **pages, @@ -188,10 +863,20 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, } hmm_range->notifier = notifier; +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT + hmm_range->flags = hmm_range_flags; + hmm_range->values = hmm_range_values; + hmm_range->pfn_shift = PAGE_SHIFT; + hmm_range->default_flags = hmm_range_flags[HMM_PFN_VALID]; + if (!readonly) + hmm_range->default_flags |= hmm_range->flags[HMM_PFN_WRITE]; + hmm_range->pfns = (uint64_t *)pfns; +#else hmm_range->default_flags = HMM_PFN_REQ_FAULT; if (!readonly) hmm_range->default_flags |= HMM_PFN_REQ_WRITE; hmm_range->hmm_pfns = pfns; +#endif hmm_range->start = start; end = start + npages * PAGE_SIZE; hmm_range->dev_private_owner = owner; @@ -207,8 +892,16 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, retry: hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT + if (unlikely(r <= 0)) { +#else if (unlikely(r)) { +#endif +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT + if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) +#else if (r == -EBUSY && !time_after(jiffies, timeout)) +#endif goto retry; goto out_free_pfns; } @@ -227,8 +920,20 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, * hmm_range_fault() fails. FIXME: The pages cannot be touched outside * the notifier_lock, and mmu_interval_read_retry() must be done first. */ - for (i = 0; pages && i < npages; i++) + for (i = 0; pages && i < npages; i++) { +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT + pages[i] = hmm_device_entry_to_page(hmm_range, pfns[i]); + if (unlikely(!pages[i])) { + pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", + i, pfns[i]); + r = -ENOMEM; + + goto out_free_pfns; + } +#else pages[i] = hmm_pfn_to_page(pfns[i]); +#endif + } *phmm_range = hmm_range; @@ -250,8 +955,13 @@ bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) r = mmu_interval_read_retry(hmm_range->notifier, hmm_range->notifier_seq); +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT + kvfree(hmm_range->pfns); +#else kvfree(hmm_range->hmm_pfns); +#endif kfree(hmm_range); return r; } +#endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index e2edcd010cccb..38492d5c4d72b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -24,6 +24,41 @@ #ifndef __AMDGPU_MN_H__ #define __AMDGPU_MN_H__ +#ifndef HAVE_AMDKCL_HMM_MIRROR_ENABLED +#include +#include +/* + * MMU Notifier + */ +struct amdgpu_mn; + +enum amdgpu_mn_type { + AMDGPU_MN_TYPE_GFX, + AMDGPU_MN_TYPE_HSA, +}; + +#if defined(CONFIG_MMU_NOTIFIER) +void amdgpu_mn_lock(struct amdgpu_mn *mn); +void amdgpu_mn_unlock(struct amdgpu_mn *mn); +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type); +int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr); +void amdgpu_hmm_unregister(struct amdgpu_bo *bo); +#else /* !CONFIG_MMU_NOTIFIER */ +static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} +static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} +static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) +{ + return NULL; +} +static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) +{ + return -ENODEV; +} +static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {} +#endif /* CONFIG_MMU_NOTIFIER */ +#else /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ #include #include #include @@ -31,6 +66,23 @@ #include #include +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +extern const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX]; +extern const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX]; + +static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn) +{ + struct hmm_range hmm_range = { + .flags = hmm_range_flags, + .values = hmm_range_values, + .pfn_shift = PAGE_SHIFT, + }; + + return hmm_device_entry_to_page(&hmm_range, hmm_pfn); +} +#endif + int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, void *owner, struct page **pages, @@ -49,5 +101,6 @@ static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) } static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {} #endif +#endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 00d6211e0fbf9..f0765ccde6680 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -225,15 +225,6 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) kfree(i2c); } -/* Add the default buses */ -void amdgpu_i2c_init(struct amdgpu_device *adev) -{ - if (amdgpu_hw_i2c) - DRM_INFO("hw_i2c forced on, you may experience display detection problems!\n"); - - amdgpu_atombios_i2c_init(adev); -} - /* remove all the buses */ void amdgpu_i2c_fini(struct amdgpu_device *adev) { @@ -247,22 +238,6 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev) } } -/* Add additional buses */ -void amdgpu_i2c_add(struct amdgpu_device *adev, - const struct amdgpu_i2c_bus_rec *rec, - const char *name) -{ - struct drm_device *dev = adev_to_drm(adev); - int i; - - for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) { - if (!adev->i2c_bus[i]) { - adev->i2c_bus[i] = amdgpu_i2c_create(dev, rec, name); - return; - } - } -} - /* looks up bus based on id */ struct amdgpu_i2c_chan * amdgpu_i2c_lookup(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h index 63c2ff7499e17..21e3d1dad0a12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h @@ -28,11 +28,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, const struct amdgpu_i2c_bus_rec *rec, const char *name); void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c); -void amdgpu_i2c_init(struct amdgpu_device *adev); void amdgpu_i2c_fini(struct amdgpu_device *adev); -void amdgpu_i2c_add(struct amdgpu_device *adev, - const struct amdgpu_i2c_bus_rec *rec, - const char *name); struct amdgpu_i2c_chan * amdgpu_i2c_lookup(struct amdgpu_device *adev, const struct amdgpu_i2c_bus_rec *i2c_bus); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 8b512dc28df83..f57e97e6eed68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,6 +143,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, unsigned int i; int r = 0; + unsigned extra_nop = 0; if (num_ibs == 0) return -EINVAL; @@ -184,6 +185,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, alloc_size = ring->funcs->emit_frame_size + num_ibs * ring->funcs->emit_ib_size; + if (job && !job->vm_needs_flush && ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + extra_nop = 128; + alloc_size += extra_nop; + } + r = amdgpu_ring_alloc(ring, alloc_size); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); @@ -214,6 +220,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, ring->funcs->insert_start(ring); if (job) { + amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast than DE */ + r = amdgpu_vm_flush(ring, job, need_pipe_sync); if (r) { amdgpu_ring_undo(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b6a8bddada4c3..92d27d32de41b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -476,15 +476,19 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub) { return vm->reserved_vmid[vmhub] || - (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); + (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? + vm->root.bo->xcp_id : 0] && + AMDGPU_IS_GFXHUB(vmhub)); } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, @@ -600,9 +604,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } } /* alloc a default reserved vmid to enforce isolation */ - if (enforce_isolation) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa67512602..4012fb2dd08a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,7 +78,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 19ce4da285e8d..0e890f2785b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -161,7 +161,10 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) * Returns: * result of handling the IRQ, as defined by &irqreturn_t */ -static irqreturn_t amdgpu_irq_handler(int irq, void *arg) +#ifndef CONFIG_DRM_LEGACY +static +#endif +irqreturn_t amdgpu_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; struct amdgpu_device *adev = drm_to_adev(dev); @@ -470,7 +473,6 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, (client_id == SOC15_IH_CLIENTID_ISP)) && adev->irq.virq[src_id]) { generic_handle_domain_irq(adev->irq.domain, src_id); - } else if (!adev->irq.client[client_id].sources) { DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", client_id, src_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 04c0b4fa17a4e..aef5c216b1911 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -121,6 +121,9 @@ enum interrupt_node_id_per_aid { extern const int node_id_to_phys_map[NODEID_MAX]; void amdgpu_irq_disable_all(struct amdgpu_device *adev); +#ifdef CONFIG_DRM_LEGACY +irqreturn_t amdgpu_irq_handler(int irq, void *arg); +#endif int amdgpu_irq_init(struct amdgpu_device *adev); void amdgpu_irq_fini_sw(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 4766e99dd98fb..478086f166507 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -33,12 +33,12 @@ #include "isp_v4_1_0.h" #include "isp_v4_1_1.h" -static int isp_sw_init(void *handle) +static int isp_sw_init(struct amdgpu_ip_block *ip_block) { return 0; } -static int isp_sw_fini(void *handle) +static int isp_sw_fini(struct amdgpu_ip_block *ip_block) { return 0; } @@ -49,17 +49,11 @@ static int isp_sw_fini(void *handle) * @handle: handle for amdgpu_device pointer * */ -static int isp_hw_init(void *handle) +static int isp_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_isp *isp = &adev->isp; - const struct amdgpu_ip_block *ip_block = - amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ISP); - - if (!ip_block) - return -EINVAL; - if (isp->funcs->hw_init != NULL) return isp->funcs->hw_init(isp); @@ -72,10 +66,9 @@ static int isp_hw_init(void *handle) * @handle: handle for amdgpu_device pointer * */ -static int isp_hw_fini(void *handle) +static int isp_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_isp *isp = &adev->isp; + struct amdgpu_isp *isp = &ip_block->adev->isp; if (isp->funcs->hw_fini != NULL) return isp->funcs->hw_fini(isp); @@ -83,12 +76,12 @@ static int isp_hw_fini(void *handle) return -ENODEV; } -static int isp_suspend(void *handle) +static int isp_suspend(struct amdgpu_ip_block *ip_block) { return 0; } -static int isp_resume(void *handle) +static int isp_resume(struct amdgpu_ip_block *ip_block) { return 0; } @@ -122,9 +115,10 @@ static int isp_load_fw_by_psp(struct amdgpu_device *adev) return r; } -static int isp_early_init(void *handle) +static int isp_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_isp *isp = &adev->isp; switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) { @@ -154,12 +148,12 @@ static bool isp_is_idle(void *handle) return true; } -static int isp_wait_for_idle(void *handle) +static int isp_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int isp_soft_reset(void *handle) +static int isp_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 908e134551523..fdadbe49c913e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -30,6 +30,60 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_reset.h" +#include "amdgpu_dev_coredump.h" +#include "amdgpu_xgmi.h" + +static void amdgpu_job_do_core_dump(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + int i; + + dev_info(adev->dev, "Dumping IP State\n"); + for (i = 0; i < adev->num_ip_blocks; i++) + if (adev->ip_blocks[i].version->funcs->dump_ip_state) + adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)&adev->ip_blocks[i]); + dev_info(adev->dev, "Dumping IP State Completed\n"); + + amdgpu_coredump(adev, true, false, job); +} + +static void amdgpu_job_core_dump(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + struct list_head device_list, *device_list_handle = NULL; + struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_hive_info *hive = NULL; + + if (!amdgpu_sriov_vf(adev)) + hive = amdgpu_get_xgmi_hive(adev); + if (hive) + mutex_lock(&hive->hive_lock); + /* + * Reuse the logic in amdgpu_device_gpu_recover() to build list of + * devices for code dump + */ + INIT_LIST_HEAD(&device_list); + if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, &device_list); + if (!list_is_first(&adev->reset_list, &device_list)) + list_rotate_to_front(&adev->reset_list, &device_list); + device_list_handle = &device_list; + } else { + list_add_tail(&adev->reset_list, &device_list); + device_list_handle = &device_list; + } + + /* Do the coredump for each device */ + list_for_each_entry(tmp_adev, device_list_handle, reset_list) + amdgpu_job_do_core_dump(tmp_adev, job); + + if (hive) { + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); + } +} static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { @@ -48,9 +102,17 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - adev->job_hang = true; + /* + * Do the coredump immediately after a job timeout to get a very + * close dump/snapshot/representation of GPU's current error status + * Skip it for SRIOV, since VF FLR will be triggered by host driver + * before job timeout + */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_job_core_dump(adev, job); + if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { dev_err(adev->dev, "ring %s timeout, but soft recovered\n", @@ -72,6 +134,26 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + /* attempt a per ring reset */ + if (amdgpu_gpu_recovery && + ring->funcs->reset) { + /* stop the scheduler, but don't mess with the + * bad job yet because if ring reset fails + * we'll fall back to full GPU reset. + */ + drm_sched_wqueue_stop(&ring->sched); + r = amdgpu_ring_reset(ring, job->vmid); + if (!r) { + if (amdgpu_ring_sched_ready(ring)) + drm_sched_stop(&ring->sched, s_job); + atomic_inc(&ring->adev->gpu_reset_counter); + amdgpu_fence_driver_force_completion(ring); + if (amdgpu_ring_sched_ready(ring)) + drm_sched_start(&ring->sched, true); + goto exit; + } + } + if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); @@ -81,6 +163,12 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) reset_context.src = AMDGPU_RESET_SRC_JOB; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /* + * To avoid an unnecessary extra coredump, as we have already + * got the very close representation of GPU's error status + */ + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); + r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) dev_err(adev->dev, "GPU Recovery Failed: %d\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index a963a25ddd620..ce6b9ba967fff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,6 +76,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; + /* enforce isolation */ + bool enforce_isolation; + uint32_t num_ibs; struct amdgpu_ib ibs[]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 6df99cb00d9a5..0a6397e3b8a74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -140,7 +140,7 @@ void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring) { atomic_dec(&ring->adev->jpeg.total_submission_cnt); - schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); + mod_delayed_work(system_wq, &ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); } int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring) @@ -342,3 +342,82 @@ int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, return psp_execute_ip_fw_load(&adev->psp, &ucode); } + +/* + * debugfs for to enable/disable jpeg job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i, j; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << (adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings)) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (val & (1 << ((i * adev->jpeg.num_jpeg_rings) + j))) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_jpeg_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i, j; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->sched.ready) + mask |= 1 << ((i * adev->jpeg.num_jpeg_rings) + j); + } + } + *val = mask; + return 0; +} + +#ifdef DEFINE_DEBUGFS_ATTRIBUTE +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_jpeg_sched_mask_fops, + amdgpu_debugfs_jpeg_sched_mask_get, + amdgpu_debugfs_jpeg_sched_mask_set, "%llx\n"); +#else +DEFINE_SIMPLE_ATTRIBUTE(amdgpu_debugfs_jpeg_sched_mask_fops, + amdgpu_debugfs_jpeg_sched_mask_get, + amdgpu_debugfs_jpeg_sched_mask_set, "%llx\n"); +#endif + +#endif + +void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->jpeg.num_jpeg_inst > 1) && !(adev->jpeg.num_jpeg_rings > 1)) + return; + sprintf(name, "amdgpu_jpeg_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_jpeg_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index f9cdd873ac9b0..819dc7a0af99e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -149,5 +149,6 @@ int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev); int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, enum AMDGPU_UCODE_ID ucode_id); +void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 96af9ff1acb67..5ffe1dad96227 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -581,6 +581,24 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; switch (info->query) { + case AMDGPU_INFO_VIRTUAL_RANGE: { + struct drm_amdgpu_virtual_range range_info; + + switch (info->virtual_range.aperture) { + case AMDGPU_SUA_APERTURE_PRIVATE: + range_info.start = adev->gmc.private_aperture_start; + range_info.end = adev->gmc.private_aperture_end; + break; + case AMDGPU_SUA_APERTURE_SHARED: + range_info.start = adev->gmc.shared_aperture_start; + range_info.end = adev->gmc.shared_aperture_end; + break; + default: + return -EINVAL; + } + return copy_to_user(out, &range_info, + min((size_t)size, sizeof(range_info))) ? -EFAULT : 0; + } case AMDGPU_INFO_ACCEL_WORKING: ui32 = adev->accel_working; return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; @@ -1056,6 +1074,21 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; } } + case AMDGPU_INFO_CAPABILITY: { + struct drm_amdgpu_capability cap; + + memset(&cap, 0, sizeof(cap)); + if (amdgpu_no_evict) + cap.flag |= AMDGPU_CAPABILITY_PIN_MEM_FLAG; + + if (amdgpu_direct_gma_size) { + cap.flag |= AMDGPU_CAPABILITY_DIRECT_GMA_FLAG; + cap.direct_gma_size = amdgpu_direct_gma_size; + } + + return copy_to_user(out, &cap, + min((size_t)size, sizeof(cap))) ? -EFAULT : 0; + } case AMDGPU_INFO_SENSOR: { if (!adev->pm.dpm_enabled) return -ENOENT; @@ -1261,6 +1294,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return copy_to_user(out, max_ibs, min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0; } + case AMDGPU_INFO_GPUVM_FAULT: { struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; @@ -1272,15 +1306,24 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) memset(&gpuvm_fault, 0, sizeof(gpuvm_fault)); +#ifdef HAVE_STRUCT_XARRAY xa_lock_irqsave(&adev->vm_manager.pasids, flags); +#else + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); +#endif gpuvm_fault.addr = vm->fault_info.addr; gpuvm_fault.status = vm->fault_info.status; gpuvm_fault.vmhub = vm->fault_info.vmhub; +#ifdef HAVE_STRUCT_XARRAY xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); +#else + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); +#endif return copy_to_user(out, &gpuvm_fault, min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0; } + default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -1380,6 +1423,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); + spin_lock_init(&fpriv->sem_handles_lock); + idr_init(&fpriv->sem_handles); amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); @@ -1419,6 +1464,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = file_priv->driver_priv; struct amdgpu_bo_list *list; + struct amdgpu_sem *sem; struct amdgpu_bo *pd; u32 pasid; int handle; @@ -1454,7 +1500,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_vm_fini(adev, &fpriv->vm); if (pasid) - amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); + amdgpu_pasid_free_delayed(amdkcl_ttm_resvp(&pd->tbo), pasid); amdgpu_bo_unref(&pd); idr_for_each_entry(&fpriv->bo_list_handles, list, handle) @@ -1463,6 +1509,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, idr_destroy(&fpriv->bo_list_handles); mutex_destroy(&fpriv->bo_list_lock); + idr_for_each_entry(&fpriv->sem_handles, sem, handle) + amdgpu_sem_destroy(fpriv, handle); + idr_destroy(&fpriv->sem_handles); + kfree(fpriv); file_priv->driver_priv = NULL; @@ -1470,13 +1520,16 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pm_runtime_put_autosuspend(dev->dev); } - void amdgpu_driver_release_kms(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); amdgpu_device_fini_sw(adev); pci_set_drvdata(adev->pdev, NULL); +#ifndef HAVE_DRM_DRM_MANAGED_H + drm_dev_fini(dev); + kfree(adev); +#endif } /* @@ -1493,7 +1546,7 @@ void amdgpu_driver_release_kms(struct drm_device *dev) u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - unsigned int pipe = crtc->index; + unsigned int pipe = drm_crtc_index(crtc); struct amdgpu_device *adev = drm_to_adev(dev); int vpos, hpos, stat; u32 count; @@ -1561,7 +1614,7 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc) int amdgpu_enable_vblank_kms(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - unsigned int pipe = crtc->index; + unsigned int pipe = drm_crtc_index(crtc); struct amdgpu_device *adev = drm_to_adev(dev); int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe); @@ -1578,7 +1631,7 @@ int amdgpu_enable_vblank_kms(struct drm_crtc *crtc) void amdgpu_disable_vblank_kms(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - unsigned int pipe = crtc->index; + unsigned int pipe = drm_crtc_index(crtc); struct amdgpu_device *adev = drm_to_adev(dev); int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 18ee60378727f..7f62c72a69653 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -594,7 +594,11 @@ static const struct file_operations mca_ue_dump_debug_fops = { .release = single_release, }; +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_set, "%llu\n"); +#else +DEFINE_SIMPLE_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_set, "%llu\n"); +#endif #endif void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index c598c3edff7ee..a072cdd316810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -192,17 +192,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev) (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); - if (r) { - dev_err(adev->dev, - "(%d) read_val_offs alloc failed\n", r); - goto error; - } - adev->mes.read_val_gpu_addr = - adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); - adev->mes.read_val_ptr = - (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs]; - r = amdgpu_mes_doorbell_init(adev); if (r) goto error; @@ -223,8 +212,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev) amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs[i]); } - if (adev->mes.read_val_ptr) - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); @@ -249,8 +236,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs[i]); } - if (adev->mes.read_val_ptr) - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); amdgpu_mes_doorbell_free(adev); @@ -501,60 +486,50 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) int amdgpu_mes_suspend(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_suspend_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); + input.suspend_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->suspend_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to suspend pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->suspend_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to suspend all gangs"); + + return r; } int amdgpu_mes_resume(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_resume_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); + input.resume_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->resume_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to resume pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->resume_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to resume all gangs"); + + return r; } static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, @@ -793,6 +768,68 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) +{ + unsigned long flags; + struct amdgpu_mes_queue *queue; + struct amdgpu_mes_gang *gang; + struct mes_reset_queue_input queue_input; + int r; + + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + /* remove the mes gang from idr list */ + spin_lock_irqsave(&adev->mes.queue_id_lock, flags); + + queue = idr_find(&adev->mes.queue_id_idr, queue_id); + if (!queue) { + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + amdgpu_mes_unlock(&adev->mes); + DRM_ERROR("queue id %d doesn't exist\n", queue_id); + return -EINVAL; + } + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + + DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n", + queue->doorbell_off); + + gang = queue->gang; + queue_input.doorbell_offset = queue->doorbell_off; + queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; + + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset hardware queue, queue id = %d\n", + queue_id); + + amdgpu_mes_unlock(&adev->mes); + + return 0; +} + +int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, + int me_id, int pipe_id, int queue_id, int vmid) +{ + struct mes_reset_queue_input queue_input; + int r; + + queue_input.queue_type = queue_type; + queue_input.use_mmio = true; + queue_input.me_id = me_id; + queue_input.pipe_id = pipe_id; + queue_input.queue_id = queue_id; + queue_input.vmid = vmid; + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset hardware queue by mmio, queue id = %d\n", + queue_id); + return r; +} + int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -840,7 +877,8 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + bool use_mmio) { struct mes_reset_legacy_queue_input queue_input; int r; @@ -849,11 +887,13 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; + queue_input.me_id = ring->me; queue_input.pipe_id = ring->pipe; queue_input.queue_id = ring->queue; - queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0; queue_input.wptr_addr = ring->wptr_gpu_addr; queue_input.vmid = vmid; + queue_input.use_mmio = use_mmio; r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); if (r) @@ -866,10 +906,19 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) { struct mes_misc_op_input op_input; int r, val = 0; + uint32_t addr_offset = 0; + uint64_t read_val_gpu_addr; + uint32_t *read_val_ptr; + if (amdgpu_device_wb_get(adev, &addr_offset)) { + DRM_ERROR("critical bug! too many mes readers\n"); + goto error; + } + read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4); + read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset]; op_input.op = MES_MISC_OP_READ_REG; op_input.read_reg.reg_offset = reg; - op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr; + op_input.read_reg.buffer_addr = read_val_gpu_addr; if (!adev->mes.funcs->misc_op) { DRM_ERROR("mes rreg is not supported!\n"); @@ -880,9 +929,11 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) if (r) DRM_ERROR("failed to read reg (0x%x)\n", reg); else - val = *(adev->mes.read_val_ptr); + val = *(read_val_ptr); error: + if (addr_offset) + amdgpu_device_wb_free(adev, addr_offset); return val; } @@ -1337,8 +1388,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, if (!amdgpu_vm_ready(vm)) goto out_unlock; - r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, - &fence); + r = dma_resv_get_singleton(amdkcl_ttm_resvp(&bo->tbo), DMA_RESV_USAGE_BOOKKEEP, &fence); if (r) goto out_unlock; if (fence) { @@ -1608,6 +1658,19 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) return r; } +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) +{ + uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + bool is_supported = false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) + is_supported = true; + + return is_supported; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 548e724e3a750..5edc3fbbbb18f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -75,6 +75,7 @@ struct amdgpu_mes { uint32_t sched_version; uint32_t kiq_version; + bool enable_legacy_queue_map; uint32_t total_max_queue; uint32_t max_doorbell_slices; @@ -118,9 +119,6 @@ struct amdgpu_mes { uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; - uint32_t read_val_offs; - uint64_t read_val_gpu_addr; - uint32_t *read_val_ptr; uint32_t saved_flags; @@ -248,6 +246,18 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_reset_queue_input { + uint32_t doorbell_offset; + uint64_t gang_context_addr; + bool use_mmio; + uint32_t queue_type; + uint32_t me_id; + uint32_t pipe_id; + uint32_t queue_id; + uint32_t xcc_id; + uint32_t vmid; +}; + struct mes_map_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; @@ -282,6 +292,8 @@ struct mes_resume_gang_input { struct mes_reset_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; + bool use_mmio; + uint32_t me_id; uint32_t pipe_id; uint32_t queue_id; uint64_t mqd_addr; @@ -360,6 +372,9 @@ struct amdgpu_mes_funcs { int (*reset_legacy_queue)(struct amdgpu_mes *mes, struct mes_reset_legacy_queue_input *input); + + int (*reset_hw_queue)(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -387,6 +402,9 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, struct amdgpu_mes_queue_properties *qprops, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, + int me_id, int pipe_id, int queue_id, int vmid); int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -396,7 +414,8 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, u64 gpu_addr, u64 seq); int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int vmid); + unsigned int vmid, + bool use_mmio); uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, @@ -494,4 +513,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) memalloc_noreclaim_restore(mes->saved_flags); mutex_unlock(&mes->mutex_hidden); } + +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 95d676ee207f3..1ca9d4ed8063a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,8 +63,6 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 5e3faefc55109..b913f3f7ed95c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -681,6 +681,11 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); +int amdgpu_display_gem_fb_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); + int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); void amdgpu_enc_destroy(struct drm_encoder *encoder); @@ -697,16 +702,19 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); + /* amdgpu_display.c */ void amdgpu_display_print_display_setup(struct drm_device *dev); int amdgpu_display_modeset_create_props(struct amdgpu_device *adev); int amdgpu_display_crtc_set_config(struct drm_mode_set *set, struct drm_modeset_acquire_ctx *ctx); + int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, uint32_t page_flip_flags, uint32_t target, struct drm_modeset_acquire_ctx *ctx); + extern const struct drm_mode_config_funcs amdgpu_mode_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index f61d117b0cafe..79c2f807b9fe8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -101,6 +101,7 @@ struct amdgpu_nbio_funcs { int (*get_compute_partition_mode)(struct amdgpu_device *adev); u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, u32 *supp_modes); + bool (*is_nps_switch_requested)(struct amdgpu_device *adev); u64 (*get_pcie_replay_count)(struct amdgpu_device *adev); void (*set_reg_remap)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e32161f6b67a3..2a4a7d2056f84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -58,6 +58,8 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + kfree(bo->dgma_addr); + amdgpu_bo_kunmap(bo); if (bo->tbo.base.import_attach) @@ -77,24 +79,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_destroy(tbo); } -static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo; - struct amdgpu_bo_vm *vmbo; - - bo = shadow_bo->parent; - vmbo = to_amdgpu_bo_vm(bo); - /* in case amdgpu_device_recover_vram got NULL of bo->parent */ - if (!list_empty(&vmbo->shadow_list)) { - mutex_lock(&adev->shadow_list_lock); - list_del_init(&vmbo->shadow_list); - mutex_unlock(&adev->shadow_list_lock); - } - - amdgpu_bo_destroy(tbo); -} - /** * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo * @bo: buffer object to be checked @@ -108,8 +92,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { if (bo->destroy == &amdgpu_bo_destroy || - bo->destroy == &amdgpu_bo_user_destroy || - bo->destroy == &amdgpu_bo_vm_destroy) + bo->destroy == &amdgpu_bo_user_destroy) return true; return false; @@ -129,7 +112,23 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) struct ttm_placement *placement = &abo->placement; struct ttm_place *places = abo->placements; u64 flags = abo->flags; - u32 c = 0; + u32 c = 0, i; + + if ((domain & AMDGPU_GEM_DOMAIN_DGMA) && amdgpu_direct_gma_size) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_DGMA; + places[c].flags = 0; + c++; + } + + if ((domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT) && amdgpu_direct_gma_size) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_DGMA_IMPORT; + places[c].flags = 0; + c++; + } if (domain & AMDGPU_GEM_DOMAIN_VRAM) { unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; @@ -151,7 +150,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn); - else + else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size) places[c].flags |= TTM_PL_FLAG_TOPDOWN; if (abo->tbo.type == ttm_bo_type_kernel && @@ -227,6 +226,9 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS); + for (i = 0; i < c; i++) + if (flags & AMDGPU_GEM_CREATE_TOP_DOWN) + places[i].flags |= TTM_PL_FLAG_TOPDOWN; placement->num_placement = c; placement->placement = places; } @@ -635,11 +637,11 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; - r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence); + r = amdgpu_ttm_clear_buffer(bo, amdkcl_ttm_resvp(&bo->tbo), &fence); if (unlikely(r)) goto fail_unreserve; - dma_resv_add_fence(bo->tbo.base.resv, fence, + dma_resv_add_fence(amdkcl_ttm_resvp(&bo->tbo), fence, DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } @@ -653,11 +655,20 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (bp->type == ttm_bo_type_device) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (((bp->flags & AMDGPU_GEM_CREATE_NO_EVICT) && amdgpu_no_evict) || + bp->domain & (AMDGPU_GEM_DOMAIN_DGMA | AMDGPU_GEM_DOMAIN_DGMA_IMPORT)) { + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + return r; + r = amdgpu_bo_pin(bo, bp->domain); + amdgpu_bo_unreserve(bo); + } + return 0; fail_unreserve: if (!bp->resv) - dma_resv_unlock(bo->tbo.base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(&bo->tbo)); amdgpu_bo_unref(&bo); return r; } @@ -722,52 +733,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, return r; } -/** - * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list - * - * @vmbo: BO that will be inserted into the shadow list - * - * Insert a BO to the shadow list. - */ -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev); - - mutex_lock(&adev->shadow_list_lock); - list_add_tail(&vmbo->shadow_list, &adev->shadow_list); - vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo); - vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy; - mutex_unlock(&adev->shadow_list_lock); -} - -/** - * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow - * - * @shadow: &amdgpu_bo shadow to be restored - * @fence: dma_fence associated with the operation - * - * Copies a buffer object's shadow content back to the object. - * This is used for recovering a buffer from its shadow in case of a gpu - * reset where vram context may be lost. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) - -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - uint64_t shadow_addr, parent_addr; - - shadow_addr = amdgpu_bo_gpu_offset(shadow); - parent_addr = amdgpu_bo_gpu_offset(shadow->parent); - - return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, - amdgpu_bo_size(shadow), NULL, fence, - true, false, 0); -} - /** * amdgpu_bo_kmap - map an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be mapped @@ -787,7 +752,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; - r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, + r = dma_resv_wait_timeout(amdkcl_ttm_resvp(&bo->tbo), DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; @@ -874,29 +839,22 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo) } /** - * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object + * amdgpu_bo_pin - pin an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be pinned * @domain: domain to be pinned to - * @min_offset: the start of requested address range - * @max_offset: the end of requested address range * - * Pins the buffer object according to requested domain and address range. If - * the memory is unbound gart memory, binds the pages into gart table. Adjusts - * pin_count and pin_size accordingly. + * Pins the buffer object according to requested domain. If the memory is + * unbound gart memory, binds the pages into gart table. Adjusts pin_count and + * pin_size accordingly. * * Pinning means to lock pages in memory along with keeping them at a fixed * offset. It is required when a buffer can not be moved, for example, when * a display buffer is being scanned out. * - * Compared with amdgpu_bo_pin(), this function gives more flexibility on - * where to pin a buffer if there are specific restrictions on where a buffer - * must be located. - * * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset) +int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { false, false }; @@ -905,9 +863,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) return -EPERM; - if (WARN_ON_ONCE(min_offset > max_offset)) - return -EINVAL; - /* Check domain to be pinned to against preferred domains */ if (bo->preferred_domains & domain) domain = bo->preferred_domains & domain; @@ -933,14 +888,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; ttm_bo_pin(&bo->tbo); - - if (max_offset != 0) { - u64 domain_start = amdgpu_ttm_domain_start(adev, - mem_type); - WARN_ON_ONCE(max_offset < - (amdgpu_bo_gpu_offset(bo) - domain_start)); - } - return 0; } @@ -949,25 +896,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, */ domain = amdgpu_bo_get_preferred_domain(adev, domain); +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN if (bo->tbo.base.import_attach) dma_buf_pin(bo->tbo.base.import_attach); +#endif /* force to pin into visible video ram */ if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; amdgpu_bo_placement_from_domain(bo, domain); for (i = 0; i < bo->placement.num_placement; i++) { - unsigned int fpfn, lpfn; - - fpfn = min_offset >> PAGE_SHIFT; - lpfn = max_offset >> PAGE_SHIFT; - - if (fpfn > bo->placements[i].fpfn) - bo->placements[i].fpfn = fpfn; - if (!bo->placements[i].lpfn || - (lpfn && lpfn < bo->placements[i].lpfn)) - bo->placements[i].lpfn = lpfn; - if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && bo->placements[i].mem_type == TTM_PL_VRAM) bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; @@ -993,24 +931,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return r; } -/** - * amdgpu_bo_pin - pin an &amdgpu_bo buffer object - * @bo: &amdgpu_bo buffer object to be pinned - * @domain: domain to be pinned to - * - * A simple wrapper to amdgpu_bo_pin_restricted(). - * Provides a simpler API for buffers that do not have any strict restrictions - * on where a buffer must be located. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) -{ - bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - return amdgpu_bo_pin_restricted(bo, domain, 0, 0); -} - /** * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object * @bo: &amdgpu_bo buffer object to be unpinned @@ -1029,8 +949,10 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) if (bo->tbo.pin_count) return; +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN if (bo->tbo.base.import_attach) dma_buf_unpin(bo->tbo.base.import_attach); +#endif if (bo->tbo.resource->mem_type == TTM_PL_VRAM) { atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size); @@ -1069,6 +991,9 @@ static const char * const amdgpu_vram_names[] = { */ int amdgpu_bo_init(struct amdgpu_device *adev) { + /* set the default AGP aperture state */ + amdgpu_gmc_set_agp_default(adev, &adev->gmc); + /* On A+A platform, VRAM can be mapped as WB */ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { /* reserve PAT memory space to WC for VRAM */ @@ -1153,7 +1078,7 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) struct amdgpu_bo_user *ubo; BUG_ON(bo->tbo.type == ttm_bo_type_kernel); - dma_resv_assert_held(bo->tbo.base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(&bo->tbo)); ubo = to_amdgpu_bo_user(bo); if (tiling_flags) @@ -1274,9 +1199,11 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, amdgpu_bo_kunmap(abo); +#ifdef HAVE_STRUCT_DMA_BUF_OPS_PIN if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); +#endif /* move_notify is called before move happens */ trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, @@ -1361,8 +1288,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) /* We only remove the fence if the resv has individualized. */ WARN_ON_ONCE(bo->type == ttm_bo_type_kernel - && bo->base.resv != &bo->base._resv); - if (bo->base.resv == &bo->base._resv) + && amdkcl_ttm_resvp(bo) != &amdkcl_ttm_resv(bo)); + if (amdkcl_ttm_resvp(bo) == &amdkcl_ttm_resv(bo)) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM || @@ -1370,17 +1297,17 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev))) return; - if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) + if (WARN_ON_ONCE(!dma_resv_trylock(amdkcl_ttm_resvp(bo)))) return; - r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); + r = amdgpu_fill_buffer(abo, 0, amdkcl_ttm_resvp(bo), &fence, true); if (!WARN_ON(r)) { amdgpu_vram_mgr_set_cleared(bo->resource); amdgpu_bo_fence(abo, fence, false); dma_fence_put(fence); } - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); } /** @@ -1445,7 +1372,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { - struct dma_resv *resv = bo->tbo.base.resv; + struct dma_resv *resv = amdkcl_ttm_resvp(&bo->tbo); int r; r = dma_resv_reserve_fences(resv, 1); @@ -1501,7 +1428,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - return amdgpu_bo_sync_wait_resv(adev, bo->tbo.base.resv, + return amdgpu_bo_sync_wait_resv(adev, amdkcl_ttm_resvp(&bo->tbo), AMDGPU_SYNC_NE_OWNER, owner, intr); } @@ -1518,11 +1445,12 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_SYSTEM); - WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) && + WARN_ON_ONCE(!dma_resv_is_locked(amdkcl_ttm_resvp(&bo->tbo)) && !bo->tbo.pin_count && bo->tbo.type != ttm_bo_type_kernel); WARN_ON_ONCE(bo->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_VRAM && !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); + WARN_ON_ONCE(bo->tbo.resource->mem_type == AMDGPU_PL_DGMA_IMPORT); return amdgpu_bo_gpu_offset_no_check(bo); } @@ -1598,7 +1526,7 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) unsigned int pin_count; u64 size; - if (dma_resv_trylock(bo->tbo.base.resv)) { + if (dma_resv_trylock(amdkcl_ttm_resvp(&bo->tbo))) { if (!bo->tbo.resource) { placement = "NONE"; } else { @@ -1609,6 +1537,12 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) else placement = "VRAM"; break; + case AMDGPU_PL_DGMA: + placement = "DGMA"; + break; + case AMDGPU_PL_DGMA_IMPORT: + placement = "DGMA_IMPORT"; + break; case TTM_PL_TT: placement = "GTT"; break; @@ -1633,7 +1567,7 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) break; } } - dma_resv_unlock(bo->tbo.base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(&bo->tbo)); } else { placement = "UNKNOWN"; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d7e27957013f3..525cd58090b89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -112,11 +112,24 @@ struct amdgpu_bo { /* Constant after initialization */ struct amdgpu_bo *parent; +#ifndef HAVE_AMDKCL_HMM_MIRROR_ENABLED + struct amdgpu_mn *mn; +#endif + +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED #ifdef CONFIG_MMU_NOTIFIER struct mmu_interval_notifier notifier; #endif +#else + struct list_head mn_list; +#endif + struct kgd_mem *kfd_bo; + /* DGMA imported buffer info */ + void *dgma_addr; + phys_addr_t dgma_import_base; + /* * For GPUs with spatial partitioning, xcp partition number, -1 means * any partition. For other ASICs without spatial partition, always 0 @@ -131,13 +144,10 @@ struct amdgpu_bo_user { u64 metadata_flags; void *metadata; u32 metadata_size; - }; struct amdgpu_bo_vm { struct amdgpu_bo bo; - struct amdgpu_bo *shadow; - struct list_head shadow_list; struct amdgpu_vm_bo_base entries[]; }; @@ -196,6 +206,10 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) return AMDGPU_GEM_DOMAIN_OA; case AMDGPU_PL_DOORBELL: return AMDGPU_GEM_DOMAIN_DOORBELL; + case AMDGPU_PL_DGMA: + return AMDGPU_GEM_DOMAIN_DGMA; + case AMDGPU_PL_DGMA_IMPORT: + return AMDGPU_GEM_DOMAIN_DGMA_IMPORT; default: break; } @@ -275,22 +289,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; } -/** - * amdgpu_bo_shadowed - check if the BO is shadowed - * - * @bo: BO to be tested. - * - * Returns: - * NULL if not shadowed or else return a BO pointer. - */ -static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo) -{ - if (bo->tbo.type == ttm_bo_type_kernel) - return to_amdgpu_bo_vm(bo)->shadow; - - return NULL; -} - bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); @@ -322,8 +320,6 @@ void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain); -int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 min_offset, u64 max_offset); void amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); @@ -349,9 +345,6 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats); -void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); -int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, - struct dma_fence **fence); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 0bb2466d539a9..675aa138ea112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -94,7 +94,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n ref_div_max = min(128 / post_div, ref_div_max); /* get matching reference and feedback divider */ - *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max); + *ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max); *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den); /* limit fb divider to its maximum */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 6e91ea1de5aaf..1017a30d3687d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -275,9 +275,13 @@ static void amdgpu_perf_read(struct perf_event *event) if ((!pe->adev->df.funcs) || (!pe->adev->df.funcs->pmc_get_count)) return; - +#ifdef HAVE_LINUX_ATOMIC_LONG_TRY_CMPXCHG prev = local64_read(&hwc->prev_count); +#endif do { +#ifndef HAVE_LINUX_ATOMIC_LONG_TRY_CMPXCHG + prev = local64_read(&hwc->prev_count); +#endif switch (hwc->config_base) { case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: @@ -288,8 +292,11 @@ static void amdgpu_perf_read(struct perf_event *event) count = 0; break; } +#ifdef HAVE_LINUX_ATOMIC_LONG_TRY_CMPXCHG } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, count)); - +#else + } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev); +#endif local64_add(count - prev, &event->count); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 189574d53ebd3..0b1e280ee2289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -159,9 +159,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp) return ret; } -static int psp_early_init(void *handle) +static int psp_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct psp_context *psp = &adev->psp; psp->autoload_supported = true; @@ -421,9 +421,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, return ret; } -static int psp_sw_init(void *handle) +static int psp_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct psp_context *psp = &adev->psp; int ret; struct psp_runtime_boot_cfg_entry boot_cfg_entry; @@ -527,9 +527,9 @@ static int psp_sw_init(void *handle) return ret; } -static int psp_sw_fini(void *handle) +static int psp_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct psp_context *psp = &adev->psp; struct psp_gfx_cmd_resp *cmd = psp->cmd; @@ -639,6 +639,8 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) return "AUTOLOAD_RLC"; case GFX_CMD_ID_BOOT_CFG: return "BOOT_CFG"; + case GFX_CMD_ID_CONFIG_SQ_PERFMON: + return "CONFIG_SQ_PERFMON"; default: return "UNKNOWN CMD"; } @@ -1043,6 +1045,31 @@ static int psp_rl_load(struct amdgpu_device *adev) return ret; } +int psp_memory_partition(struct psp_context *psp, int mode) +{ + struct psp_gfx_cmd_resp *cmd; + int ret; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = GFX_CMD_ID_FB_NPS_MODE; + cmd->cmd.cmd_memory_part.mode = mode; + + dev_info(psp->adev->dev, + "Requesting %d memory partition change through PSP", mode); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (ret) + dev_err(psp->adev->dev, + "PSP request failed to change to NPS%d mode\n", mode); + + release_psp_cmd_buf(psp); + + return ret; +} + int psp_spatial_partition(struct psp_context *psp, int mode) { struct psp_gfx_cmd_resp *cmd; @@ -2264,6 +2291,19 @@ bool amdgpu_psp_get_ras_capability(struct psp_context *psp) } } +bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev) +{ + struct psp_context *psp = &adev->psp; + + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) + return false; + + if (psp->funcs && psp->funcs->is_reload_needed) + return psp->funcs->is_reload_needed(psp); + + return false; +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -2853,7 +2893,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp) if (ret) return ret; - /* Start rlc autoload after psp recieved all the gfx firmware */ + /* Start rlc autoload after psp received all the gfx firmware */ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload_start(psp); @@ -2958,10 +2998,10 @@ static int psp_load_fw(struct amdgpu_device *adev) return ret; } -static int psp_hw_init(void *handle) +static int psp_hw_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; mutex_lock(&adev->firmware.mutex); /* @@ -2987,9 +3027,9 @@ static int psp_hw_init(void *handle) return -EINVAL; } -static int psp_hw_fini(void *handle) +static int psp_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct psp_context *psp = &adev->psp; if (psp->ta_fw) { @@ -3011,10 +3051,10 @@ static int psp_hw_fini(void *handle) return 0; } -static int psp_suspend(void *handle) +static int psp_suspend(struct amdgpu_ip_block *ip_block) { int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct psp_context *psp = &adev->psp; if (adev->gmc.xgmi.num_physical_nodes > 1 && @@ -3074,10 +3114,10 @@ static int psp_suspend(void *handle) return ret; } -static int psp_resume(void *handle) +static int psp_resume(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct psp_context *psp = &adev->psp; dev_info(adev->dev, "PSP is resuming...\n"); @@ -3425,9 +3465,11 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; const struct psp_firmware_header_v2_0 *sos_hdr_v2_0; - int err = 0; + const struct psp_firmware_header_v2_1 *sos_hdr_v2_1; + int fw_index, fw_bin_count, start_index = 0; + const struct psp_fw_bin_desc *fw_bin; uint8_t *ucode_array_start_addr; - int fw_index = 0; + int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); if (err) @@ -3478,15 +3520,30 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) case 2: sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data; - if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { + fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); + + if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) { dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); err = -EINVAL; goto out; } - for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) { - err = parse_sos_bin_descriptor(psp, - &sos_hdr_v2_0->psp_fw_bin[fw_index], + if (sos_hdr_v2_0->header.header_version_minor == 1) { + sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data; + + fw_bin = sos_hdr_v2_1->psp_fw_bin; + + if (psp_is_aux_sos_load_required(psp)) + start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + else + fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index); + + } else { + fw_bin = sos_hdr_v2_0->psp_fw_bin; + } + + for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) { + err = parse_sos_bin_descriptor(psp, fw_bin + fw_index, sos_hdr_v2_0); if (err) goto out; @@ -3719,8 +3776,44 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) return err; } +int psp_config_sq_perfmon(struct psp_context *psp, + uint32_t xcp_id, bool core_override_enable, + bool reg_override_enable, bool perfmon_override_enable) +{ + int ret; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (xcp_id > MAX_XCP) { + dev_err(psp->adev->dev, "invalid xcp_id %d\n", xcp_id); + return -EINVAL; + } + + if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) { + dev_err(psp->adev->dev, "Unsupported MP0 version 0x%x for CONFIG_SQ_PERFMON command\n", + amdgpu_ip_version(psp->adev, MP0_HWIP, 0)); + return -EINVAL; + } + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = GFX_CMD_ID_CONFIG_SQ_PERFMON; + cmd->cmd.config_sq_perfmon.gfx_xcp_mask = BIT_MASK(xcp_id); + cmd->cmd.config_sq_perfmon.core_override = core_override_enable; + cmd->cmd.config_sq_perfmon.reg_override = reg_override_enable; + cmd->cmd.config_sq_perfmon.perfmon_override = perfmon_override_enable; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (ret) + dev_warn(psp->adev->dev, "PSP failed to config sq: xcp%d core%d reg%d perfmon%d\n", + xcp_id, core_override_enable, reg_override_enable, perfmon_override_enable); + + release_psp_cmd_buf(psp); + return ret; +} + static int psp_set_clockgating_state(void *handle, - enum amd_clockgating_state state) + enum amd_clockgating_state state) { return 0; } @@ -3981,6 +4074,7 @@ static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribu return adev->psp.sup_ifwi_up ? 0440 : 0; } +#ifdef HAVE_ATTRIBUTE_GROUP_IS_BIN_VISIBLE static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, struct bin_attribute *attr, int idx) @@ -3991,11 +4085,14 @@ static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, return adev->psp.sup_ifwi_up ? 0660 : 0; } +#endif const struct attribute_group amdgpu_flash_attr_group = { .attrs = flash_attrs, .bin_attrs = bin_flash_attrs, +#ifdef HAVE_ATTRIBUTE_GROUP_IS_BIN_VISIBLE .is_bin_visible = amdgpu_bin_flash_attr_is_visible, +#endif .is_visible = amdgpu_flash_attr_is_visible, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 74a96516c9138..567cb1f924ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -138,6 +138,8 @@ struct psp_funcs { int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); bool (*get_ras_capability)(struct psp_context *psp); + bool (*is_aux_sos_load_required)(struct psp_context *psp); + bool (*is_reload_needed)(struct psp_context *psp); }; struct ta_funcs { @@ -464,6 +466,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->fatal_error_recovery_quirk ? \ (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0) +#define psp_is_aux_sos_load_required(psp) \ + ((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -548,9 +553,15 @@ int psp_load_fw_list(struct psp_context *psp, void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); int psp_spatial_partition(struct psp_context *psp, int mode); +int psp_memory_partition(struct psp_context *psp, int mode); int is_psp_fw_valid(struct psp_bin_desc bin); int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev); bool amdgpu_psp_get_ras_capability(struct psp_context *psp); + +int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id, + bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); +bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b9..38face981c3e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 61a2f386d9fbe..71e8eafbbfbce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -882,7 +882,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, if (ret) return ret; - /* gfx block ras dsiable cmd must send to ras-ta */ + /* gfx block ras disable cmd must send to ras-ta */ if (head->block == AMDGPU_RAS_BLOCK__GFX) con->features |= BIT(head->block); @@ -1214,6 +1214,42 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, } } +static void amdgpu_ras_virt_error_generate_report(struct amdgpu_device *adev, + struct ras_query_if *query_if, + struct ras_err_data *err_data, + struct ras_query_context *qctx) +{ + unsigned long new_ue, new_ce, new_de; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &query_if->head); + const char *blk_name = get_ras_block_str(&query_if->head); + u64 event_id = qctx->evid.event_id; + + new_ce = err_data->ce_count - obj->err_data.ce_count; + new_ue = err_data->ue_count - obj->err_data.ue_count; + new_de = err_data->de_count - obj->err_data.de_count; + + if (new_ce) { + RAS_EVENT_LOG(adev, event_id, "%lu correctable hardware errors " + "detected in %s block\n", + new_ce, + blk_name); + } + + if (new_ue) { + RAS_EVENT_LOG(adev, event_id, "%lu uncorrectable hardware errors " + "detected in %s block\n", + new_ue, + blk_name); + } + + if (new_de) { + RAS_EVENT_LOG(adev, event_id, "%lu deferred hardware errors " + "detected in %s block\n", + new_de, + blk_name); + } +} + static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data) { struct ras_err_node *err_node; @@ -1237,6 +1273,15 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s } } +static void amdgpu_ras_mgr_virt_error_data_statistics_update(struct ras_manager *obj, + struct ras_err_data *err_data) +{ + /* Host reports absolute counts */ + obj->err_data.ue_count = err_data->ue_count; + obj->err_data.ce_count = err_data->ce_count; + obj->err_data.de_count = err_data->de_count; +} + static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk) { struct ras_common_if head; @@ -1323,7 +1368,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY) return -EINVAL; - if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { + if (error_query_mode == AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) { + return amdgpu_virt_req_ras_err_count(adev, blk, err_data); + } else if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { amdgpu_ras_get_ecc_info(adev, err_data); } else { @@ -1405,14 +1452,22 @@ static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev, if (ret) goto out_fini_err_data; - amdgpu_rasmgr_error_data_statistic_update(obj, &err_data); + if (error_query_mode != AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) { + amdgpu_rasmgr_error_data_statistic_update(obj, &err_data); + amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx); + } else { + /* Host provides absolute error counts. First generate the report + * using the previous VF internal count against new host count. + * Then Update VF internal count. + */ + amdgpu_ras_virt_error_generate_report(adev, info, &err_data, &qctx); + amdgpu_ras_mgr_virt_error_data_statistics_update(obj, &err_data); + } info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; info->de_count = obj->err_data.de_count; - amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx); - out_fini_err_data: amdgpu_ras_error_data_fini(&err_data); @@ -2101,6 +2156,16 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) /* Fatal error events are handled on host side */ if (amdgpu_sriov_vf(adev)) return; + /** + * If the current interrupt is caused by a non-fatal RAS error, skip + * check for fatal error. For fatal errors, FED status of all devices + * in XGMI hive gets set when the first device gets fatal error + * interrupt. The error gets propagated to other devices as well, so + * make sure to ack the interrupt regardless of FED status. + */ + if (!amdgpu_ras_get_fed_status(adev) && + amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY)) + return; if (adev->nbio.ras && adev->nbio.ras->handle_ras_controller_intr_no_bifring) @@ -2130,6 +2195,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * if (ret) return; + amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block); /* both query_poison_status and handle_poison_consumption are optional, * but at least one of them should be implemented if we need poison * consumption handler @@ -2180,6 +2246,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "Poison is created\n"); +#ifdef HAVE_KFIFO_PUT_NON_POINTER if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) { struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev); @@ -2188,6 +2255,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj wake_up(&con->page_retirement_wq); } +#endif } static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ -2845,6 +2913,7 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +#ifdef HAVE_KFIFO_PUT_NON_POINTER int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) @@ -2876,7 +2945,9 @@ static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, return kfifo_get(&con->poison_fifo, poison_msg); } +#endif +#ifdef HAVE_RADIX_TREE_ITER_DELETE static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) { mutex_init(&ecc_log->lock); @@ -2905,6 +2976,7 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) ecc_log->de_queried_count = 0; ecc_log->prev_de_queried_count = 0; } +#endif static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con, uint32_t delayed_ms) @@ -3013,6 +3085,7 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, return 0; } +#ifdef HAVE_KFIFO_PUT_NON_POINTER static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -3067,6 +3140,7 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, return 0; } +#endif static int amdgpu_ras_page_retirement_thread(void *param) { @@ -3099,6 +3173,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) } } while (atomic_read(&con->poison_creation_count)); +#ifdef HAVE_KFIFO_PUT_NON_POINTER if (ret != -EIO) { msg_count = kfifo_len(&con->poison_fifo); if (msg_count) { @@ -3141,12 +3216,48 @@ static int amdgpu_ras_page_retirement_thread(void *param) /* Wake up work to save bad pages to eeprom */ schedule_delayed_work(&con->page_retirement_dwork, 0); } +#endif } return 0; } -int amdgpu_ras_recovery_init(struct amdgpu_device *adev) +int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret; + + if (!con || amdgpu_sriov_vf(adev)) + return 0; + + ret = amdgpu_ras_eeprom_init(&con->eeprom_control); + + if (ret) + return ret; + + /* HW not usable */ + if (amdgpu_ras_is_rma(adev)) + return -EHWPOISON; + + if (con->eeprom_control.ras_num_recs) { + ret = amdgpu_ras_load_bad_pages(adev); + if (ret) + return ret; + + amdgpu_dpm_send_hbm_bad_pages_num( + adev, con->eeprom_control.ras_num_recs); + + if (con->update_channel_flag == true) { + amdgpu_dpm_send_hbm_bad_channel_flag( + adev, con->eeprom_control.bad_channel_bitmap); + con->update_channel_flag = false; + } + } + + return ret; +} + +int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data **data; @@ -3181,31 +3292,10 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control); amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); - /* Todo: During test the SMU might fail to read the eeprom through I2C - * when the GPU is pending on XGMI reset during probe time - * (Mostly after second bus reset), skip it now - */ - if (adev->gmc.xgmi.pending_reset) - return 0; - ret = amdgpu_ras_eeprom_init(&con->eeprom_control); - /* - * This calling fails when is_rma is true or - * ret != 0. - */ - if (amdgpu_ras_is_rma(adev) || ret) - goto free; - - if (con->eeprom_control.ras_num_recs) { - ret = amdgpu_ras_load_bad_pages(adev); + if (init_bp_info) { + ret = amdgpu_ras_init_badpage_info(adev); if (ret) goto free; - - amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); - - if (con->update_channel_flag == true) { - amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); - con->update_channel_flag = false; - } } mutex_init(&con->page_rsv_lock); @@ -3222,12 +3312,15 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement); +#ifdef HAVE_RADIX_TREE_ITER_DELETE amdgpu_ras_ecc_log_init(&con->umc_ecc_log); +#endif #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) amdgpu_register_bad_pages_mca_notifier(adev); #endif + return 0; free: @@ -3278,7 +3371,9 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) cancel_delayed_work_sync(&con->page_retirement_dwork); +#ifdef HAVE_RADIX_TREE_ITER_DELETE amdgpu_ras_ecc_log_fini(&con->umc_ecc_log); +#endif mutex_lock(&con->recovery_lock); con->eh_data = NULL; @@ -3438,6 +3533,11 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (!amdgpu_ras_asic_supported(adev)) return; + if (amdgpu_sriov_vf(adev)) { + if (amdgpu_virt_get_ras_capability(adev)) + goto init_ras_enabled_flag; + } + /* query ras capability from psp */ if (amdgpu_psp_get_ras_capability(&adev->psp)) goto init_ras_enabled_flag; @@ -3468,6 +3568,11 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) /* aca is disabled by default */ adev->aca.is_enabled = false; + + /* bad page feature is not applicable to specific app platform */ + if (adev->gmc.is_app_apu && + amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0)) + amdgpu_bad_page_threshold = 0; } static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -3905,7 +4010,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) } /* Guest side doesn't need init ras feature */ - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_telemetry_en(adev)) return 0; list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { @@ -4003,16 +4108,56 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) if (!ras) return false; - return atomic_read(&ras->fed); + return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); } void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) { struct amdgpu_ras *ras; + ras = amdgpu_ras_get_context(adev); + if (ras) { + if (status) + set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + else + clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + } +} + +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + ras->ras_err_state = 0; +} + +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras *ras; + ras = amdgpu_ras_get_context(adev); if (ras) - atomic_set(&ras->fed, !!status); + set_bit(block, &ras->ras_err_state); +} + +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) { + if (block == AMDGPU_RAS_BLOCK__ANY) + return (ras->ras_err_state != 0); + else + return test_bit(block, &ras->ras_err_state) || + test_bit(AMDGPU_RAS_BLOCK__LAST, + &ras->ras_err_state); + } + + return false; } static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev) @@ -4165,8 +4310,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, * and error occurred in DramECC (Extended error code = 0) then only * process the error, else bail out. */ +#ifdef HAVE_SMCA_UMC_V2 if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) && (XEC(m->status, 0x3f) == 0x0))) +#endif return NOTIFY_DONE; /* @@ -4289,8 +4436,27 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; } - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + int hive_ras_recovery = 0; + + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + /* In the case of multiple GPUs, after a GPU has started + * resetting all GPUs on hive, other GPUs do not need to + * trigger GPU reset again. + */ + if (!hive_ras_recovery) + amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work); + else + atomic_set(&ras->in_recovery, 0); + } else { + flush_work(&ras->recovery_work); amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work); + } + return 0; } @@ -4353,11 +4519,14 @@ bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, return false; } - if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) + if (amdgpu_sriov_vf(adev)) { + *error_query_mode = AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY; + } else if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) { *error_query_mode = (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; - else + } else { *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY; + } return true; } @@ -4580,7 +4749,11 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void) return err_node; } +#ifdef HAVE_LIST_CMP_FUNC_IS_CONST_PARAM static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b) +#else +static int ras_err_info_cmp(void *priv, struct list_head *a, struct list_head *b) +#endif { struct ras_err_node *nodea = container_of(a, struct ras_err_node, node); struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 669720a9c60af..b13debcf48ee3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -99,7 +99,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__IH, AMDGPU_RAS_BLOCK__MPIO, - AMDGPU_RAS_BLOCK__LAST + AMDGPU_RAS_BLOCK__LAST, + AMDGPU_RAS_BLOCK__ANY = -1 }; enum amdgpu_ras_mca_block { @@ -365,6 +366,7 @@ enum amdgpu_ras_error_query_mode { AMDGPU_RAS_INVALID_ERROR_QUERY = 0, AMDGPU_RAS_DIRECT_ERROR_QUERY = 1, AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2, + AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY = 3, }; /* ras error status reisger fields */ @@ -557,8 +559,8 @@ struct amdgpu_ras { struct ras_ecc_log_info umc_ecc_log; struct delayed_work page_retirement_dwork; - /* Fatal error detected flag */ - atomic_t fed; + /* ras errors detected */ + unsigned long ras_err_state; /* RAS event manager */ struct ras_event_manager __event_mgr; @@ -736,8 +738,8 @@ struct amdgpu_ras_block_hw_ops { * 8: feature disable */ - -int amdgpu_ras_recovery_init(struct amdgpu_device *adev); +int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev); +int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info); void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); @@ -951,6 +953,10 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block); +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev); +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index aab8077e50988..f28f6b4ba765d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -58,7 +58,7 @@ #define EEPROM_I2C_MADDR_4 0x40000 /* - * The 2 macros bellow represent the actual size in bytes that + * The 2 macros below represent the actual size in bytes that * those entities occupy in the EEPROM memory. * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which * uses uint64 to store 6b fields such as retired_page. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 66c1a868c0e16..647ffb69754eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -26,6 +26,161 @@ #include "sienna_cichlid.h" #include "smu_v13_0_10.h" +static int amdgpu_reset_xgmi_reset_on_init_suspend(struct amdgpu_device *adev) +{ + int i, r; + + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (!adev->ip_blocks[i].status.hw) + continue; + /* displays are handled in phase1 */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) + continue; + + /* XXX handle errors */ + r = adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]); + /* XXX handle errors */ + if (r) { + dev_err(adev->dev, "suspend of IP block <%s> failed %d", + adev->ip_blocks[i].version->funcs->name, r); + } + adev->ip_blocks[i].status.hw = false; + } + + /* VCN FW shared region is in frambuffer, there are some flags + * initialized in that region during sw_init. Make sure the region is + * backed up. + */ + amdgpu_vcn_save_vcpu_bo(adev); + + return 0; +} + +static int amdgpu_reset_xgmi_reset_on_init_prep_hwctxt( + struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct list_head *reset_device_list = reset_context->reset_device_list; + struct amdgpu_device *tmp_adev; + int r; + + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { + amdgpu_unregister_gpu_instance(tmp_adev); + r = amdgpu_reset_xgmi_reset_on_init_suspend(tmp_adev); + if (r) { + dev_err(tmp_adev->dev, + "xgmi reset on init: prepare for reset failed"); + return r; + } + } + + return r; +} + +static int amdgpu_reset_xgmi_reset_on_init_restore_hwctxt( + struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct list_head *reset_device_list = reset_context->reset_device_list; + struct amdgpu_device *tmp_adev = NULL; + int r; + + r = amdgpu_device_reinit_after_reset(reset_context); + if (r) + return r; + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { + if (!tmp_adev->kfd.init_complete) { + kgd2kfd_init_zone_device(tmp_adev); + amdgpu_amdkfd_device_init(tmp_adev); + amdgpu_amdkfd_drm_client_create(tmp_adev); + } + } + + return r; +} + +static int amdgpu_reset_xgmi_reset_on_init_perform_reset( + struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + struct list_head *reset_device_list = reset_context->reset_device_list; + struct amdgpu_device *tmp_adev = NULL; + int r; + + dev_dbg(adev->dev, "xgmi roi - hw reset\n"); + + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { + mutex_lock(&tmp_adev->reset_cntl->reset_lock); + tmp_adev->reset_cntl->active_reset = + amdgpu_asic_reset_method(adev); + } + r = 0; + /* Mode1 reset needs to be triggered on all devices together */ + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { + /* For XGMI run all resets in parallel to speed up the process */ + if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + if (r) { + dev_err(tmp_adev->dev, + "xgmi reset on init: reset failed with error, %d", + r); + break; + } + } + + /* For XGMI wait for all resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } + + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { + mutex_unlock(&tmp_adev->reset_cntl->reset_lock); + tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE; + } + + return r; +} + +int amdgpu_reset_do_xgmi_reset_on_init( + struct amdgpu_reset_context *reset_context) +{ + struct list_head *reset_device_list = reset_context->reset_device_list; + struct amdgpu_device *adev; + int r; + + if (!reset_device_list || list_empty(reset_device_list) || + list_is_singular(reset_device_list)) + return -EINVAL; + + adev = list_first_entry(reset_device_list, struct amdgpu_device, + reset_list); + r = amdgpu_reset_prepare_hwcontext(adev, reset_context); + if (r) + return r; + + r = amdgpu_reset_perform_reset(adev, reset_context); + + return r; +} + +struct amdgpu_reset_handler xgmi_reset_on_init_handler = { + .reset_method = AMD_RESET_METHOD_ON_INIT, + .prepare_env = NULL, + .prepare_hwcontext = amdgpu_reset_xgmi_reset_on_init_prep_hwctxt, + .perform_reset = amdgpu_reset_xgmi_reset_on_init_perform_reset, + .restore_hwcontext = amdgpu_reset_xgmi_reset_on_init_restore_hwctxt, + .restore_env = NULL, + .do_reset = NULL, +}; + int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; @@ -119,6 +274,14 @@ void amdgpu_reset_destroy_reset_domain(struct kref *ref) kvfree(reset_domain); } +static void amdgpu_reset_domain_cancel_all_work(struct work_struct *work) +{ + struct amdgpu_reset_domain *reset_domain = + container_of(work, struct amdgpu_reset_domain, clear); + + reset_domain->drain = false; +} + struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, char *wq_name) { @@ -141,6 +304,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d } + INIT_WORK(&reset_domain->clear, amdgpu_reset_domain_cancel_all_work); atomic_set(&reset_domain->in_gpu_reset, 0); atomic_set(&reset_domain->reset_res, 0); init_rwsem(&reset_domain->sem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4ae581f3fcb54..977b2dd2205ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -98,6 +98,8 @@ struct amdgpu_reset_domain { struct rw_semaphore sem; atomic_t in_gpu_reset; atomic_t reset_res; + struct work_struct clear; + bool drain; }; int amdgpu_reset_init(struct amdgpu_device *adev); @@ -136,6 +138,26 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +static inline void amdgpu_reset_domain_clear_pending(struct amdgpu_reset_domain *domain) +{ + domain->drain = true; + /* queue one more work to the domain queue. Till this work is finished, + * domain is in drain mode. + */ + queue_work(domain->wq, &domain->clear); +} + +static inline bool amdgpu_reset_domain_in_drain_mode(struct amdgpu_reset_domain *domain) +{ + return domain->drain; +} + +static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) +{ + lockdep_assert_held(&domain->sem); + return rwsem_is_contended(&domain->sem); +} + void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); @@ -147,4 +169,9 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ (handler = (*reset_ctl->reset_handlers)[i]); \ ++i) + +extern struct amdgpu_reset_handler xgmi_reset_on_init_handler; +int amdgpu_reset_do_xgmi_reset_on_init( + struct amdgpu_reset_context *reset_context); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 690976665cf69..5b83c87389072 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -589,6 +589,7 @@ static const struct file_operations amdgpu_debugfs_mqd_fops = { .llseek = default_llseek }; +#ifdef DEFINE_DEBUGFS_ATTRIBUTE static int amdgpu_debugfs_ring_error(void *data, u64 val) { struct amdgpu_ring *ring = data; @@ -599,7 +600,7 @@ static int amdgpu_debugfs_ring_error(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL, amdgpu_debugfs_ring_error, "%lld\n"); - +#endif #endif void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, @@ -622,10 +623,11 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, ring->mqd_size); } +#ifdef DEFINE_DEBUGFS_ATTRIBUTE sprintf(name, "amdgpu_error_%s", ring->name); debugfs_create_file(name, 0200, root, ring, &amdgpu_debugfs_error_fops); - +#endif #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 582053f1cd565..f93f510022018 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -235,6 +235,8 @@ struct amdgpu_ring_funcs { void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); + int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); + void (*emit_cleaner_shader)(struct amdgpu_ring *ring); }; struct amdgpu_ring { @@ -334,6 +336,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) +#define amdgpu_ring_reset(r, v) (r)->funcs->reset((r), (v)) unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index d234b7ccfaafc..1c66da1c3fb42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -410,7 +410,7 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) struct amdgpu_ring_mux *mux = &adev->gfx.muxer; WARN_ON(!ring->is_sw_ring); - if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) return; amdgpu_ring_mux_end_ib(mux, ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 863b2a34b2d64..f9ff493c100e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -22,7 +22,6 @@ * Authors: Andres Rodriguez */ -#include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c new file mode 100644 index 0000000000000..432072b28f5ae --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c @@ -0,0 +1,462 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chunming Zhou + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "amdgpu.h" +#include "amdgpu_sem.h" + +#define to_amdgpu_ctx_entity(e) \ + container_of((e), struct amdgpu_ctx_entity, entity) + +static int amdgpu_sem_entity_add(struct amdgpu_fpriv *fpriv, + struct drm_amdgpu_sem_in *in, + struct amdgpu_sem *sem); + +static void amdgpu_sem_core_free(struct kref *kref) +{ + struct amdgpu_sem_core *core = container_of( + kref, struct amdgpu_sem_core, kref); + + dma_fence_put(core->fence); + mutex_destroy(&core->lock); + kfree(core); +} + +static void amdgpu_sem_free(struct kref *kref) +{ + struct amdgpu_sem *sem = container_of( + kref, struct amdgpu_sem, kref); + + kref_put(&sem->base->kref, amdgpu_sem_core_free); + kfree(sem); +} + +static inline void amdgpu_sem_get(struct amdgpu_sem *sem) +{ + if (sem) + kref_get(&sem->kref); +} + +void amdgpu_sem_put(struct amdgpu_sem *sem) +{ + if (sem) + kref_put(&sem->kref, amdgpu_sem_free); +} + +static int amdgpu_sem_release(struct inode *inode, struct file *file) +{ + struct amdgpu_sem_core *core = file->private_data; + + /* set the core->file to null if file got released */ + mutex_lock(&core->lock); + core->file = NULL; + mutex_unlock(&core->lock); + + kref_put(&core->kref, amdgpu_sem_core_free); + return 0; +} + +static unsigned int amdgpu_sem_poll(struct file *file, poll_table *wait) +{ + return 0; +} + +static long amdgpu_sem_file_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return 0; +} + +static const struct file_operations amdgpu_sem_fops = { + .release = amdgpu_sem_release, + .poll = amdgpu_sem_poll, + .unlocked_ioctl = amdgpu_sem_file_ioctl, + .compat_ioctl = amdgpu_sem_file_ioctl, +}; + + +static inline struct amdgpu_sem *amdgpu_sem_lookup(struct amdgpu_fpriv *fpriv, u32 handle) +{ + struct amdgpu_sem *sem; + + spin_lock(&fpriv->sem_handles_lock); + + /* Check if we currently have a reference on the object */ + sem = idr_find(&fpriv->sem_handles, handle); + amdgpu_sem_get(sem); + + spin_unlock(&fpriv->sem_handles_lock); + + return sem; +} + +static struct amdgpu_sem_core *amdgpu_sem_core_alloc(void) +{ + struct amdgpu_sem_core *core; + + core = kzalloc(sizeof(*core), GFP_KERNEL); + if (!core) + return NULL; + + kref_init(&core->kref); + mutex_init(&core->lock); + return core; +} + +static struct amdgpu_sem *amdgpu_sem_alloc(void) +{ + struct amdgpu_sem *sem; + + sem = kzalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return NULL; + + kref_init(&sem->kref); + INIT_LIST_HEAD(&sem->list); + + return sem; +} + +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle) +{ + struct amdgpu_sem *sem; + struct amdgpu_sem_core *core; + int ret; + + sem = amdgpu_sem_alloc(); + core = amdgpu_sem_core_alloc(); + if (!sem || !core) { + kfree(sem); + kfree(core); + return -ENOMEM; + } + + sem->base = core; + + idr_preload(GFP_KERNEL); + spin_lock(&fpriv->sem_handles_lock); + + ret = idr_alloc(&fpriv->sem_handles, sem, 1, 0, GFP_NOWAIT); + + spin_unlock(&fpriv->sem_handles_lock); + idr_preload_end(); + + if (ret < 0) + return ret; + + *handle = ret; + return 0; +} + +static int amdgpu_sem_signal(struct amdgpu_fpriv *fpriv, + u32 handle, struct dma_fence *fence) +{ + struct amdgpu_sem *sem; + struct amdgpu_sem_core *core; + + sem = amdgpu_sem_lookup(fpriv, handle); + if (!sem) + return -EINVAL; + + core = sem->base; + mutex_lock(&core->lock); + dma_fence_put(core->fence); + core->fence = dma_fence_get(fence); + mutex_unlock(&core->lock); + + amdgpu_sem_put(sem); + return 0; +} + +static int amdgpu_sem_wait(struct amdgpu_fpriv *fpriv, + struct drm_amdgpu_sem_in *in) +{ + struct amdgpu_sem *sem; + int ret; + + sem = amdgpu_sem_lookup(fpriv, in->handle); + if (!sem) + return -EINVAL; + + ret = amdgpu_sem_entity_add(fpriv, in, sem); + amdgpu_sem_put(sem); + + return ret; +} + +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv, + int fd, u32 *handle) +{ + struct file *file = fget(fd); + struct amdgpu_sem *sem; + struct amdgpu_sem_core *core; + int ret; + + if (!file) + return -EINVAL; + + core = file->private_data; + if (!core) { + fput(file); + return -EINVAL; + } + + kref_get(&core->kref); + sem = amdgpu_sem_alloc(); + if (!sem) { + ret = -ENOMEM; + goto err_sem; + } + + sem->base = core; + + idr_preload(GFP_KERNEL); + spin_lock(&fpriv->sem_handles_lock); + + ret = idr_alloc(&fpriv->sem_handles, sem, 1, 0, GFP_NOWAIT); + + spin_unlock(&fpriv->sem_handles_lock); + idr_preload_end(); + + if (ret < 0) + goto err_out; + + *handle = ret; + fput(file); + return 0; +err_sem: + kref_put(&core->kref, amdgpu_sem_core_free); +err_out: + amdgpu_sem_put(sem); + fput(file); + return ret; + +} + +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv, + u32 handle, int *fd) +{ + struct amdgpu_sem *sem; + struct amdgpu_sem_core *core; + int ret; + + sem = amdgpu_sem_lookup(fpriv, handle); + if (!sem) + return -EINVAL; + + core = sem->base; + kref_get(&core->kref); + mutex_lock(&core->lock); + if (!core->file) { + core->file = anon_inode_getfile("sem_file", + &amdgpu_sem_fops, + core, 0); + if (IS_ERR(core->file)) { + mutex_unlock(&core->lock); + ret = -ENOMEM; + goto err_put_sem; + } + } else { + get_file(core->file); + } + mutex_unlock(&core->lock); + + ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) + goto err_put_file; + + fd_install(ret, core->file); + + *fd = ret; + amdgpu_sem_put(sem); + + return 0; + +err_put_file: + fput(core->file); +err_put_sem: + kref_put(&core->kref, amdgpu_sem_core_free); + amdgpu_sem_put(sem); + return ret; +} + +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle) +{ + struct amdgpu_sem *sem = amdgpu_sem_lookup(fpriv, handle); + if (!sem) + return; + + spin_lock(&fpriv->sem_handles_lock); + idr_remove(&fpriv->sem_handles, handle); + spin_unlock(&fpriv->sem_handles_lock); + + kref_put(&sem->kref, amdgpu_sem_free); + kref_put(&sem->kref, amdgpu_sem_free); +} + +static struct dma_fence *amdgpu_sem_get_fence(struct amdgpu_fpriv *fpriv, + struct drm_amdgpu_sem_in *in) +{ + struct drm_sched_entity *entity; + struct amdgpu_ctx *ctx; + struct dma_fence *fence; + uint32_t ctx_id, ip_type, ip_instance, ring; + int r; + + ctx_id = in->ctx_id; + ip_type = in->ip_type; + ip_instance = in->ip_instance; + ring = in->ring; + ctx = amdgpu_ctx_get(fpriv, ctx_id); + if (!ctx) + return NULL; + r = amdgpu_ctx_get_entity(ctx, ip_type, + ip_instance, ring, &entity); + if (r) { + amdgpu_ctx_put(ctx); + return NULL; + } + /* get the last fence of this entity */ + fence = amdgpu_ctx_get_fence(ctx, entity, in->seq); + amdgpu_ctx_put(ctx); + + return fence; +} + +static int amdgpu_sem_entity_add(struct amdgpu_fpriv *fpriv, + struct drm_amdgpu_sem_in *in, + struct amdgpu_sem *sem) +{ + struct amdgpu_ctx *ctx; + struct amdgpu_sem_dep *dep; + struct drm_sched_entity *entity; + struct amdgpu_ctx_entity *centity; + uint32_t ctx_id, ip_type, ip_instance, ring; + int r; + + ctx_id = in->ctx_id; + ip_type = in->ip_type; + ip_instance = in->ip_instance; + ring = in->ring; + ctx = amdgpu_ctx_get(fpriv, ctx_id); + if (!ctx) + return -EINVAL; + r = amdgpu_ctx_get_entity(ctx, ip_type, + ip_instance, ring, &entity); + if (r) + goto err; + + dep = kzalloc(sizeof(*dep), GFP_KERNEL); + if (!dep) + goto err; + + INIT_LIST_HEAD(&dep->list); + dep->fence = dma_fence_get(sem->base->fence); + + centity = to_amdgpu_ctx_entity(entity); + mutex_lock(¢ity->sem_lock); + list_add(&dep->list, ¢ity->sem_dep_list); + mutex_unlock(¢ity->sem_lock); + +err: + amdgpu_ctx_put(ctx); + return r; +} + +int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, + struct amdgpu_sync *sync) +{ + struct amdgpu_sem_dep *dep, *tmp; + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); + int r = 0; + + if (list_empty(¢ity->sem_dep_list)) + return 0; + + mutex_lock(¢ity->sem_lock); + list_for_each_entry_safe(dep, tmp, ¢ity->sem_dep_list, + list) { + r = amdgpu_sync_fence(sync, dep->fence); + if (r) + goto err; + dma_fence_put(dep->fence); + list_del_init(&dep->list); + kfree(dep); + } +err: + mutex_unlock(¢ity->sem_lock); + return r; +} + +int amdgpu_sem_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_sem *args = data; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct dma_fence *fence; + int r = 0; + + switch (args->in.op) { + case AMDGPU_SEM_OP_CREATE_SEM: + r = amdgpu_sem_create(fpriv, &args->out.handle); + break; + case AMDGPU_SEM_OP_WAIT_SEM: + r = amdgpu_sem_wait(fpriv, &args->in); + break; + case AMDGPU_SEM_OP_SIGNAL_SEM: + fence = amdgpu_sem_get_fence(fpriv, &args->in); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); + return r; + } + r = amdgpu_sem_signal(fpriv, args->in.handle, fence); + dma_fence_put(fence); + break; + case AMDGPU_SEM_OP_IMPORT_SEM: + r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle); + break; + case AMDGPU_SEM_OP_EXPORT_SEM: + r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd); + break; + case AMDGPU_SEM_OP_DESTROY_SEM: + amdgpu_sem_destroy(fpriv, args->in.handle); + break; + default: + r = -EINVAL; + break; + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h new file mode 100644 index 0000000000000..dbbb9d4540233 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.h @@ -0,0 +1,64 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Chunming Zhou + * + */ + + +#ifndef _LINUX_AMDGPU_SEM_H +#define _LINUX_AMDGPU_SEM_H + +#include +#include +#include +#include +#include + +struct amdgpu_sem_core { + struct file *file; + struct kref kref; + struct dma_fence *fence; + struct mutex lock; +}; + +struct amdgpu_sem_dep { + struct dma_fence *fence; + struct list_head list; +}; + +struct amdgpu_sem { + struct amdgpu_sem_core *base; + struct kref kref; + struct list_head list; +}; + +void amdgpu_sem_put(struct amdgpu_sem *sem); + +int amdgpu_sem_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +int amdgpu_sem_add_cs(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, + struct amdgpu_sync *sync); + +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle); + +#endif /* _LINUX_AMDGPU_SEM_H */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bdf1ef825d896..8dc23cbfb039a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -29,8 +29,6 @@ * Christian König */ -#include - #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" @@ -260,6 +258,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return 0; } +/** + * amdgpu_sync_kfd - sync to KFD fences + * + * @sync: sync object to add KFD fences to + * @resv: reservation object with KFD fences + * + * Extract all KFD fences and add them to the sync object. + */ +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv) +{ + struct dma_resv_iter cursor; + struct dma_fence *f; + int r = 0; + + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, f) { + void *fence_owner = amdgpu_sync_get_owner(f); + + if (fence_owner != AMDGPU_FENCE_OWNER_KFD) + continue; + + r = amdgpu_sync_fence(sync, f); + if (r) + break; + } + dma_resv_iter_end(&cursor); + + return r; +} + /* Free the entry back to the slab */ static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) { @@ -391,11 +419,13 @@ int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) } dma_fence_get(f); +#ifdef HAVE_STRUCT_XARRAY r = drm_sched_job_add_dependency(&job->base, f); if (r) { dma_fence_put(f); return r; } +#endif } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index cf1e9e858efdc..e3272dce798d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -51,6 +51,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); +int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 383fce40d4dd7..5a52e45f3ba50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -178,10 +178,10 @@ TRACE_EVENT(amdgpu_cs_ioctl, TP_fast_assign( __entry->sched_job_id = job->base.id; - __assign_str(timeline); + __amdkcl_assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring); + __amdkcl_assign_str(ring, to_amdgpu_ring(job->base.sched)->name); __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -203,10 +203,10 @@ TRACE_EVENT(amdgpu_sched_run_job, TP_fast_assign( __entry->sched_job_id = job->base.id; - __assign_str(timeline); + __amdkcl_assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)); __entry->context = job->base.s_fence->finished.context; __entry->seqno = job->base.s_fence->finished.seqno; - __assign_str(ring); + __amdkcl_assign_str(ring, to_amdgpu_ring(job->base.sched)->name); __entry->num_ibs = job->num_ibs; ), TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -231,7 +231,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_fast_assign( __entry->pasid = vm->pasid; - __assign_str(ring); + __amdkcl_assign_str(ring, ring->name); __entry->vmid = job->vmid; __entry->vm_hub = ring->vm_hub, __entry->pd_addr = job->vm_pd_addr; @@ -425,7 +425,7 @@ TRACE_EVENT(amdgpu_vm_flush, ), TP_fast_assign( - __assign_str(ring); + __amdkcl_assign_str(ring, ring->name); __entry->vmid = vmid; __entry->vm_hub = ring->vm_hub; __entry->pd_addr = pd_addr; @@ -526,7 +526,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, ), TP_fast_assign( - __assign_str(ring); + __amdkcl_assign_str(ring, sched_job->base.sched->name); __entry->id = sched_job->base.id; __entry->fence = fence; __entry->ctx = fence->context; @@ -559,5 +559,5 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/amd/amdgpu +#define TRACE_INCLUDE_PATH . #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b8bc7fa8c3750..7afad16b5a3a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -32,6 +32,9 @@ #include #include +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED +#include +#endif #include #include #include @@ -127,6 +130,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, return; case TTM_PL_VRAM: + case AMDGPU_PL_DGMA: if (!adev->mman.buffer_funcs_enabled) { /* Move to system memory */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); @@ -153,6 +157,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } break; case TTM_PL_TT: + case AMDGPU_PL_DGMA_IMPORT: case AMDGPU_PL_PREEMPT: default: amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); @@ -161,6 +166,27 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, *placement = abo->placement; } +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +/** + * amdgpu_verify_access - Verify access for a mmap call + * + * @bo: The buffer object to map + * @filp: The file pointer from the process performing the mmap + * + * This is called by ttm_bo_mmap() to verify whether a process + * has the right to mmap a BO to their process space. + */ +static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) +{ + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + + if (amdgpu_ttm_tt_get_usermm(bo->ttm)) + return -EPERM; + return drm_vma_node_verify_access(&abo->tbo.base.vma_node, + filp->private_data); +} +#endif + /** * amdgpu_ttm_map_buffer - Map memory into the GART windows * @bo: buffer object to map @@ -391,7 +417,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, new_mem->size, amdgpu_bo_encrypted(abo), - bo->base.resv, &fence); + amdkcl_ttm_resvp(bo), &fence); if (r) goto error; @@ -500,6 +526,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, } abo = ttm_to_amdgpu_bo(bo); + adev = amdgpu_ttm_adev(bo->bdev); if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && @@ -508,6 +535,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, ttm_bo_move_null(bo, new_mem); return 0; } + if (old_mem->mem_type == AMDGPU_GEM_DOMAIN_DGMA || + old_mem->mem_type == AMDGPU_GEM_DOMAIN_DGMA_IMPORT) + return -EINVAL; + if (old_mem->mem_type == TTM_PL_SYSTEM && (new_mem->mem_type == TTM_PL_TT || new_mem->mem_type == AMDGPU_PL_PREEMPT)) { @@ -608,7 +639,10 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, case AMDGPU_PL_PREEMPT: break; case TTM_PL_VRAM: - mem->bus.offset = mem->start << PAGE_SHIFT; + case AMDGPU_PL_DGMA: + mem->bus.offset = (mem->start << PAGE_SHIFT) + + amdgpu_ttm_domain_start(adev, mem->mem_type) - + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); if (adev->mman.aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) @@ -624,6 +658,17 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.is_iomem = true; mem->bus.caching = ttm_uncached; break; + case AMDGPU_PL_DGMA_IMPORT: + { + struct amdgpu_bo *abo; + + abo = ttm_to_amdgpu_bo(mem->bo); + mem->bus.addr = abo->dgma_addr; + mem->bus.offset = abo->dgma_import_base; + mem->bus.is_iomem = true; + mem->bus.caching = ttm_write_combined; + break; + } default: return -EINVAL; } @@ -636,6 +681,10 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_res_cursor cursor; + if (bo->resource->mem_type == AMDGPU_PL_DGMA || + bo->resource->mem_type == AMDGPU_PL_DGMA_IMPORT) + return (bo->resource->bus.offset >> PAGE_SHIFT) + page_offset; + amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); @@ -661,6 +710,12 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type) return adev->gmc.gart_start; case TTM_PL_VRAM: return adev->gmc.vram_start; + case AMDGPU_PL_DGMA: + if (adev->direct_gma.dgma_bo) + return amdgpu_bo_gpu_offset(adev->direct_gma.dgma_bo); + fallthrough; + case AMDGPU_PL_DGMA_IMPORT: + return 0; } return 0; @@ -669,6 +724,13 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type) /* * TTM backend functions. */ +#ifndef HAVE_AMDKCL_HMM_MIRROR_ENABLED +struct amdgpu_ttm_gup_task_list { + struct list_head list; + struct task_struct *task; +}; +#endif + struct amdgpu_ttm_tt { struct ttm_tt ttm; struct drm_gem_object *gobj; @@ -677,12 +739,19 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; +#ifndef HAVE_AMDKCL_HMM_MIRROR_ENABLED + spinlock_t guptasklock; + struct list_head guptasks; + atomic_t mmu_invalidations; + uint32_t last_set_pages; +#endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ int32_t pool_id; }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) #ifdef CONFIG_DRM_AMDGPU_USERPTR +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update @@ -766,12 +835,100 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", gtt->userptr, ttm->num_pages); - WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); +#ifndef HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT + WARN_ONCE(!range->pfns, +#else + WARN_ONCE(!range->hmm_pfns, +#endif + "No user pages to check\n"); return !amdgpu_hmm_range_get_pages_done(range); } -#endif +#else +/* + * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR + * pointer to memory + * + * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). + * This provides a wrapper around the get_user_pages() call to provide + * device accessible pages that back user memory. + */ +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range) +{ + struct ttm_tt *ttm = bo->tbo.ttm; + struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct mm_struct *mm = gtt->usertask->mm; + unsigned int flags = 0; + unsigned pinned = 0; + int r; + + if (!mm) /* Happens during process shutdown */ + return -ESRCH; + + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) + flags |= FOLL_WRITE; + + down_read(&mm->mmap_sem); + + if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { + /* + * check that we only use anonymous memory to prevent problems + * with writeback + */ + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct vm_area_struct *vma; + + vma = find_vma(mm, gtt->userptr); + if (!vma || vma->vm_file || vma->vm_end < end) { + up_read(&mm->mmap_sem); + return -EPERM; + } + } + + /* loop enough times using contiguous pages of memory */ + do { + unsigned num_pages = ttm->num_pages - pinned; + uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; + struct page **p = pages + pinned; + struct amdgpu_ttm_gup_task_list guptask; + + guptask.task = current; + spin_lock(>t->guptasklock); + list_add(&guptask.list, >t->guptasks); + spin_unlock(>t->guptasklock); + + if (mm == current->mm) + r = get_user_pages(userptr, num_pages, flags, p, NULL); + else + r = kcl_get_user_pages_remote(gtt->usertask, + mm, userptr, num_pages, + flags, p, NULL, NULL); + + spin_lock(>t->guptasklock); + list_del(&guptask.list); + spin_unlock(>t->guptasklock); + + if (r < 0) + goto release_pages; + + pinned += r; + + } while (pinned < ttm->num_pages); + + up_read(&mm->mmap_sem); + return 0; + +release_pages: + release_pages(pages, pinned); + up_read(&mm->mmap_sem); + return r; +} +#endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ +#endif /* CONFIG_DRM_AMDGPU_USERPTR */ + +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * @@ -787,6 +944,52 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) ttm->pages[i] = pages ? pages[i] : NULL; } +#else +/** + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. + * + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. + */ +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned i; + + gtt->last_set_pages = atomic_read(>t->mmu_invalidations); + for (i = 0; i < ttm->num_pages; ++i) { + if (ttm->pages[i]) + put_page(ttm->pages[i]); + + ttm->pages[i] = pages ? pages[i] : NULL; + } +} + +/** + * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * + * Called while unpinning userptr pages + */ +void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned i; + + for (i = 0; i < ttm->num_pages; ++i) { + struct page *page = ttm->pages[i]; + + if (!page) + continue; + + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) + set_page_dirty(page); + + mark_page_accessed(page); + } +} +#endif + /* * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages * @@ -844,7 +1047,15 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, /* unmap the pages mapped to the device */ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); + +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED + sg_free_table(ttm->sg); +#else + /* mark the pages as dirty */ + amdgpu_ttm_tt_mark_user_pages(ttm); + sg_free_table(ttm->sg); +#endif } /* @@ -932,6 +1143,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, return r; } } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) { +#if defined(HAVE_DMA_BUF_OPS_DYNAMIC_MAPPING) || \ + defined(HAVE_STRUCT_DMA_BUF_OPS_PIN) if (!ttm->sg) { struct dma_buf_attachment *attach; struct sg_table *sgt; @@ -943,6 +1156,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, ttm->sg = sgt; } +#endif drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address, ttm->num_pages); @@ -1055,11 +1269,14 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, if (gtt->userptr) { amdgpu_ttm_tt_unpin_userptr(bdev, ttm); } else if (ttm->sg && gtt->gobj->import_attach) { +#if defined(HAVE_DMA_BUF_OPS_DYNAMIC_MAPPING) || \ + defined(HAVE_STRUCT_DMA_BUF_OPS_PIN) struct dma_buf_attachment *attach; attach = gtt->gobj->import_attach; dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); ttm->sg = NULL; +#endif } if (!gtt->bound) @@ -1260,6 +1477,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); +#ifndef HAVE_AMDKCL_HMM_MIRROR_ENABLED + spin_lock_init(>t->guptasklock); + INIT_LIST_HEAD(>t->guptasks); + atomic_set(>t->mmu_invalidations, 0); + gtt->last_set_pages = 0; +#endif + return 0; } @@ -1279,6 +1503,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) return gtt->usertask->mm; } +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an * address range for the current task. @@ -1318,6 +1543,77 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) return true; } +#else +/* + * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an + * address range for the current task. + * + */ +bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, + unsigned long end, unsigned long *userptr) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_gup_task_list *entry; + unsigned long size; + + if (gtt == NULL || !gtt->userptr) + return false; + + /* Return false if no part of the ttm_tt object lies within + * the range + */ + size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE; + if (gtt->userptr > end || gtt->userptr + size <= start) + return false; + + /* Search the lists of tasks that hold this mapping and see + * if current is one of them. If it is return false. + */ + spin_lock(>t->guptasklock); + list_for_each_entry(entry, >t->guptasks, list) { + if (entry->task == current) { + spin_unlock(>t->guptasklock); + return false; + } + } + spin_unlock(>t->guptasklock); + + atomic_inc(>t->mmu_invalidations); + + if (userptr) + *userptr = gtt->userptr; + + return true; +} + +/** + * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? + */ +bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, + int *last_invalidated) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int prev_invalidated = *last_invalidated; + + *last_invalidated = atomic_read(>t->mmu_invalidations); + return prev_invalidated != *last_invalidated; +} + +/** + * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object + * been invalidated since the last time they've been set? + */ +bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL || !gtt->userptr) + return false; + + return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; +} +#endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ + /* * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? */ @@ -1376,6 +1672,9 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, { uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem); + if (mem && mem->mem_type == AMDGPU_PL_DGMA_IMPORT) + flags |= AMDGPU_PTE_SYSTEM; + flags |= adev->gart.gart_pte_flags; flags |= AMDGPU_PTE_READABLE; @@ -1415,7 +1714,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - dma_resv_for_each_fence(&resv_cursor, bo->base.resv, + dma_resv_for_each_fence(&resv_cursor, amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP, f) { if (amdkfd_fence_check_mm(f, current->mm) && !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) @@ -1597,6 +1896,9 @@ static struct ttm_device_funcs amdgpu_bo_driver = { .eviction_valuable = amdgpu_ttm_bo_eviction_valuable, .evict_flags = &amdgpu_evict_flags, .move = &amdgpu_bo_move, +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK + .verify_access = &amdgpu_verify_access, +#endif .delete_mem_notify = &amdgpu_bo_delete_mem_notify, .release_notify = &amdgpu_bo_release_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, @@ -1798,6 +2100,93 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) return 0; } +static int amdgpu_direct_gma_init(struct amdgpu_device *adev) +{ + struct amdgpu_bo *abo; + struct amdgpu_bo_param bp; + unsigned long size; + int r; + + if (amdgpu_direct_gma_size == 0) + return 0; + + size = (unsigned long)amdgpu_direct_gma_size << 20; + + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_TOP_DOWN; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + /* reserve in visible vram */ + r = amdgpu_bo_create(adev, &bp, &abo); + if (unlikely(r)) + goto error_out; + + r = amdgpu_bo_reserve(abo, false); + if (unlikely(r)) + goto error_free; + + r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); + amdgpu_bo_unreserve(abo); + if (unlikely(r)) + goto error_free; + + adev->direct_gma.dgma_bo = abo; + + /* reserve in gtt */ + atomic64_add(size, &adev->gart_pin_size); + r = ttm_range_man_init(&adev->mman.bdev, AMDGPU_PL_DGMA, + false, size >> PAGE_SHIFT); + if (unlikely(r)) + goto error_put_node; + + r = ttm_range_man_init(&adev->mman.bdev, AMDGPU_PL_DGMA_IMPORT, + false, size >> PAGE_SHIFT); + if (unlikely(r)) + goto error_release_mm; + + DRM_INFO("%dMB VRAM/GTT reserved for Direct GMA\n", amdgpu_direct_gma_size); + return 0; + +error_release_mm: + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DGMA); + +error_put_node: + atomic64_sub(size, &adev->gart_pin_size); +error_free: + amdgpu_bo_unref(&abo); + +error_out: + amdgpu_direct_gma_size = 0; + memset(&adev->direct_gma, 0, sizeof(adev->direct_gma)); + DRM_ERROR("Fail to enable Direct GMA\n"); + return r; +} + +static void amdgpu_direct_gma_fini(struct amdgpu_device *adev) +{ + int r; + + if (amdgpu_direct_gma_size == 0) + return; + + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DGMA_IMPORT); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DGMA); + + r = amdgpu_bo_reserve(adev->direct_gma.dgma_bo, false); + if (r == 0) { + amdgpu_bo_unpin(adev->direct_gma.dgma_bo); + amdgpu_bo_unreserve(adev->direct_gma.dgma_bo); + } + amdgpu_bo_unref(&adev->direct_gma.dgma_bo); + atomic64_sub((u64)amdgpu_direct_gma_size << 20, &adev->gart_pin_size); +} + static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) { int i; @@ -1846,6 +2235,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) { uint64_t gtt_size; int r; + bool need_dma32; + +#ifdef AMDKCL_DMA_ADDRESSING_LIMITED_WORKAROUND + /* + * set DMA mask + need_dma32 flags. + * PCIE - can handle 44-bits. + * IGP - can handle 44-bits + * PCI - dma32 for legacy pci gart + */ + need_dma32 = !!pci_set_dma_mask(adev->pdev, dma_get_mask(adev->dev)); +#else + need_dma32 = dma_addressing_limited(adev->dev); +#endif mutex_init(&adev->mman.gtt_window_lock); @@ -1854,7 +2256,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev_to_drm(adev)->anon_inode->i_mapping, adev_to_drm(adev)->vma_offset_manager, adev->need_swiotlb, - dma_addressing_limited(adev->dev)); + need_dma32); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; @@ -1960,6 +2362,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; else gtt_size = (uint64_t)amdgpu_gtt_size << 20; + /* reserve for DGMA import domain */ + gtt_size -= (uint64_t)amdgpu_direct_gma_size << 20; /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); @@ -1970,7 +2374,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); - /* Initiailize doorbell pool on PCI BAR */ + amdgpu_direct_gma_init(adev); + /* Initialize doorbell pool on PCI BAR */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); if (r) { DRM_ERROR("Failed initializing doorbell heap.\n"); @@ -2056,6 +2461,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) drm_dev_exit(idx); } + drain_workqueue(adev->mman.bdev.wq); + amdgpu_direct_gma_fini(adev); amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); amdgpu_preempt_mgr_fini(adev); @@ -2156,13 +2563,69 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, adev->gart.bo); (*job)->vm_needs_flush = true; } +#ifndef HAVE_STRUCT_XARRAY + return 0; +#else if (!resv) return 0; return drm_sched_job_add_resv_dependencies(&(*job)->base, resv, DMA_RESV_USAGE_BOOKKEEP); +#endif } +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +#ifndef HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG +static vm_fault_t amdgpu_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct ttm_buffer_object *bo = vma->vm_private_data; +#else +static vm_fault_t amdgpu_ttm_fault(struct vm_fault *vmf) +{ + struct ttm_buffer_object *bo = vmf->vma->vm_private_data; +#endif + vm_fault_t ret; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + ret = amdgpu_bo_fault_reserve_notify(bo); + if (ret) + goto unlock; + + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + +unlock: + dma_resv_unlock(amdkcl_ttm_resvp(bo)); + return ret; +} + +static struct vm_operations_struct amdgpu_ttm_vm_ops = { + .fault = amdgpu_ttm_fault, + .open = ttm_bo_vm_open, + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access +}; + +int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *file_priv = filp->private_data; + struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev); + int r; + + r = ttm_bo_mmap(filp, vma, &adev->mman.bdev); + if (unlikely(r != 0)) + return r; + + vma->vm_ops = &amdgpu_ttm_vm_ops; + return 0; +} +#endif /* HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK */ + int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, @@ -2399,7 +2862,6 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) } #if defined(CONFIG_DEBUG_FS) - static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) { struct amdgpu_device *adev = m->private; @@ -2407,7 +2869,31 @@ static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) return ttm_pool_debugfs(&adev->mman.bdev.pool, m); } +static int amdgpu_mm_dgma_table_show(struct seq_file *m, void *unused) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_DGMA); + struct drm_printer p = drm_seq_file_printer(m); + + man->func->debug(man, &p); + return 0; +} + +static int amdgpu_mm_dgma_import_table_show(struct seq_file *m, void *unused) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_DGMA_IMPORT); + struct drm_printer p = drm_seq_file_printer(m); + + man->func->debug(man, &p); + return 0; +} + DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool); +DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_dgma_table); +DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_dgma_import_table); /* * amdgpu_ttm_vram_read - Linear read access to VRAM @@ -2618,6 +3104,7 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) &amdgpu_ttm_iomem_fops); debugfs_create_file("ttm_page_pool", 0444, root, adev, &amdgpu_ttm_page_pool_fops); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), root, "amdgpu_vram_mm"); @@ -2634,5 +3121,11 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) AMDGPU_PL_OA), root, "amdgpu_oa_mm"); + if (amdgpu_direct_gma_size) { + debugfs_create_file("amdgpu_dgma_mm", 0444, root, adev, + &amdgpu_mm_dgma_table_fops); + debugfs_create_file("amdgpu_dgma_import_mm", 0444, root, adev, + &amdgpu_mm_dgma_import_table_fops); + } #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 138d80017f356..acd83977731f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -34,6 +34,8 @@ #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) #define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) +#define AMDGPU_PL_DGMA (TTM_PL_PRIV + 7) +#define AMDGPU_PL_DGMA_IMPORT (TTM_PL_PRIV + 8) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 @@ -49,6 +51,13 @@ struct amdgpu_gtt_mgr { spinlock_t lock; }; +struct amdgpu_dgma_import_mgr { + struct ttm_resource_manager manager; + struct drm_mm mm; + spinlock_t lock; + atomic64_t available; +}; + struct amdgpu_mman { struct ttm_device bdev; struct ttm_pool *ttm_pools; @@ -68,6 +77,7 @@ struct amdgpu_mman { struct amdgpu_vram_mgr vram_mgr; struct amdgpu_gtt_mgr gtt_mgr; + struct amdgpu_dgma_import_mgr dgma_import_mgr; struct ttm_resource_manager preempt_mgr; uint64_t stolen_vga_size; @@ -179,6 +189,10 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence, bool delayed); +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +#endif + int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); @@ -217,9 +231,14 @@ bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end, unsigned long *userptr); +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); +#else bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); +void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +#endif bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 5bc37acd39819..4e23419b92d4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -136,6 +136,14 @@ struct psp_firmware_header_v2_0 { struct psp_fw_bin_desc psp_fw_bin[]; }; +/* version_major=2, version_minor=1 */ +struct psp_firmware_header_v2_1 { + struct common_firmware_header header; + uint32_t psp_fw_bin_count; + uint32_t psp_aux_fw_bin_index; + struct psp_fw_bin_desc psp_fw_bin[]; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +434,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; struct psp_firmware_header_v2_0 psp_v2_0; + struct psp_firmware_header_v2_0 psp_v2_1; struct ta_firmware_header_v1_0 ta; struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; @@ -447,7 +456,7 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; -#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) +#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2) /* * fw loading support diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index bb7b9b2eaac1a..b9fec295b9a67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -247,12 +247,16 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int ret; +#ifdef HAVE_KFIFO_PUT_NON_POINTER ret = amdgpu_ras_put_poison_req(adev, block, pasid, pasid_fn, data, reset); if (!ret) { +#endif atomic_inc(&con->page_retirement_req_cnt); wake_up(&con->page_retirement_wq); +#ifdef HAVE_KFIFO_PUT_NON_POINTER } +#endif } } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) @@ -318,6 +322,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (r) return r; + if (amdgpu_sriov_vf(adev)) + return r; + if (amdgpu_ras_is_supported(adev, ras_block->block)) { r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 6162582d0aa27..3ef75f1a47f65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -765,9 +765,9 @@ static int umsch_mm_init(struct amdgpu_device *adev) } -static int umsch_mm_early_init(void *handle) +static int umsch_mm_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): @@ -784,9 +784,9 @@ static int umsch_mm_early_init(void *handle) return 0; } -static int umsch_mm_late_init(void *handle) +static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) return 0; @@ -794,9 +794,9 @@ static int umsch_mm_late_init(void *handle) return umsch_mm_test(adev); } -static int umsch_mm_sw_init(void *handle) +static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = umsch_mm_init(adev); @@ -815,9 +815,9 @@ static int umsch_mm_sw_init(void *handle) return 0; } -static int umsch_mm_sw_fini(void *handle) +static int umsch_mm_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; @@ -839,9 +839,9 @@ static int umsch_mm_sw_fini(void *handle) return 0; } -static int umsch_mm_hw_init(void *handle) +static int umsch_mm_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = umsch_mm_load_microcode(&adev->umsch_mm); @@ -857,9 +857,9 @@ static int umsch_mm_hw_init(void *handle) return 0; } -static int umsch_mm_hw_fini(void *handle) +static int umsch_mm_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; umsch_mm_ring_stop(&adev->umsch_mm); @@ -873,18 +873,14 @@ static int umsch_mm_hw_fini(void *handle) return 0; } -static int umsch_mm_suspend(void *handle) +static int umsch_mm_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return umsch_mm_hw_fini(adev); + return umsch_mm_hw_fini(ip_block); } -static int umsch_mm_resume(void *handle) +static int umsch_mm_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return umsch_mm_hw_init(adev); + return umsch_mm_hw_init(ip_block); } void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 31fd30dcd593b..cc536b13f4e7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1167,11 +1167,13 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err_free; } else { +#ifdef HAVE_STRUCT_XARRAY r = drm_sched_job_add_resv_dependencies(&job->base, - bo->tbo.base.resv, + amdkcl_ttm_resvp(&bo->tbo), DMA_RESV_USAGE_KERNEL); if (r) goto err_free; +#endif f = amdgpu_job_submit(job); } @@ -1311,7 +1313,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) - schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); + mod_delayed_work(system_wq, &ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 74fdbf71d95b7..9fe93826c23f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -395,7 +395,7 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) - schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); + mod_delayed_work(system_wq, &ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 43f44cc201cb8..aecb78e0519f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -294,21 +294,12 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t return ret; } -int amdgpu_vcn_suspend(struct amdgpu_device *adev) +int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) { unsigned int size; void *ptr; int i, idx; - bool in_ras_intr = amdgpu_ras_intr_triggered(); - - cancel_delayed_work_sync(&adev->vcn.idle_work); - - /* err_event_athub will corrupt VCPU buffer, so we need to - * restore fw data and clear buffer in amdgpu_vcn_resume() */ - if (in_ras_intr) - return 0; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; @@ -327,9 +318,24 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) drm_dev_exit(idx); } } + return 0; } +int amdgpu_vcn_suspend(struct amdgpu_device *adev) +{ + bool in_ras_intr = amdgpu_ras_intr_triggered(); + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + /* err_event_athub will corrupt VCPU buffer, so we need to + * restore fw data and clear buffer in amdgpu_vcn_resume() */ + if (in_ras_intr) + return 0; + + return amdgpu_vcn_save_vcpu_bo(adev); +} + int amdgpu_vcn_resume(struct amdgpu_device *adev) { unsigned int size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index c87d68d4be536..765b809d48a25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,6 +330,9 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; + + /* IP reg dump */ + uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -515,5 +518,6 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, enum AMDGPU_UCODE_ID ucode_id); +int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b6397d3229e1b..c704e9803e110 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -523,6 +523,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->unique_id = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid; + adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all; + adev->virt.ras_telemetry_en_caps.all = + ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all; break; default: dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version); @@ -703,6 +706,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + adev->virt.fw_reserve.ras_telemetry = + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10)); } else if (adev->mman.drv_vram_usage_va) { adev->virt.fw_reserve.p_pf2vf = (struct amd_sriov_msg_pf2vf_info_header *) @@ -710,6 +715,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_vf2pf = (struct amd_sriov_msg_vf2pf_info_header *) (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + adev->virt.fw_reserve.ras_telemetry = + (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10)); } amdgpu_virt_read_pf2vf_data(adev); @@ -1144,3 +1151,185 @@ bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) return xnack_mode; } + +bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!amdgpu_sriov_ras_caps_en(adev)) + return false; + + if (adev->virt.ras_en_caps.bits.block_umc) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC); + if (adev->virt.ras_en_caps.bits.block_sdma) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA); + if (adev->virt.ras_en_caps.bits.block_gfx) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX); + if (adev->virt.ras_en_caps.bits.block_mmhub) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB); + if (adev->virt.ras_en_caps.bits.block_athub) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB); + if (adev->virt.ras_en_caps.bits.block_pcie_bif) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF); + if (adev->virt.ras_en_caps.bits.block_hdp) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP); + if (adev->virt.ras_en_caps.bits.block_xgmi_wafl) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL); + if (adev->virt.ras_en_caps.bits.block_df) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF); + if (adev->virt.ras_en_caps.bits.block_smn) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN); + if (adev->virt.ras_en_caps.bits.block_sem) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM); + if (adev->virt.ras_en_caps.bits.block_mp0) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0); + if (adev->virt.ras_en_caps.bits.block_mp1) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1); + if (adev->virt.ras_en_caps.bits.block_fuse) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE); + if (adev->virt.ras_en_caps.bits.block_mca) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA); + if (adev->virt.ras_en_caps.bits.block_vcn) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN); + if (adev->virt.ras_en_caps.bits.block_jpeg) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG); + if (adev->virt.ras_en_caps.bits.block_ih) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH); + if (adev->virt.ras_en_caps.bits.block_mpio) + adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO); + + if (adev->virt.ras_en_caps.bits.poison_propogation_mode) + con->poison_supported = true; /* Poison is handled by host */ + + return true; +} + +static inline enum amd_sriov_ras_telemetry_gpu_block +amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) { + switch (block) { + case AMDGPU_RAS_BLOCK__UMC: + return RAS_TELEMETRY_GPU_BLOCK_UMC; + case AMDGPU_RAS_BLOCK__SDMA: + return RAS_TELEMETRY_GPU_BLOCK_SDMA; + case AMDGPU_RAS_BLOCK__GFX: + return RAS_TELEMETRY_GPU_BLOCK_GFX; + case AMDGPU_RAS_BLOCK__MMHUB: + return RAS_TELEMETRY_GPU_BLOCK_MMHUB; + case AMDGPU_RAS_BLOCK__ATHUB: + return RAS_TELEMETRY_GPU_BLOCK_ATHUB; + case AMDGPU_RAS_BLOCK__PCIE_BIF: + return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF; + case AMDGPU_RAS_BLOCK__HDP: + return RAS_TELEMETRY_GPU_BLOCK_HDP; + case AMDGPU_RAS_BLOCK__XGMI_WAFL: + return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL; + case AMDGPU_RAS_BLOCK__DF: + return RAS_TELEMETRY_GPU_BLOCK_DF; + case AMDGPU_RAS_BLOCK__SMN: + return RAS_TELEMETRY_GPU_BLOCK_SMN; + case AMDGPU_RAS_BLOCK__SEM: + return RAS_TELEMETRY_GPU_BLOCK_SEM; + case AMDGPU_RAS_BLOCK__MP0: + return RAS_TELEMETRY_GPU_BLOCK_MP0; + case AMDGPU_RAS_BLOCK__MP1: + return RAS_TELEMETRY_GPU_BLOCK_MP1; + case AMDGPU_RAS_BLOCK__FUSE: + return RAS_TELEMETRY_GPU_BLOCK_FUSE; + case AMDGPU_RAS_BLOCK__MCA: + return RAS_TELEMETRY_GPU_BLOCK_MCA; + case AMDGPU_RAS_BLOCK__VCN: + return RAS_TELEMETRY_GPU_BLOCK_VCN; + case AMDGPU_RAS_BLOCK__JPEG: + return RAS_TELEMETRY_GPU_BLOCK_JPEG; + case AMDGPU_RAS_BLOCK__IH: + return RAS_TELEMETRY_GPU_BLOCK_IH; + case AMDGPU_RAS_BLOCK__MPIO: + return RAS_TELEMETRY_GPU_BLOCK_MPIO; + default: + dev_err(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n", block); + return RAS_TELEMETRY_GPU_BLOCK_COUNT; + } +} + +static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev, + struct amdsriov_ras_telemetry *host_telemetry) +{ + struct amd_sriov_ras_telemetry_error_count *tmp = NULL; + uint32_t checksum, used_size; + + checksum = host_telemetry->header.checksum; + used_size = host_telemetry->header.used_size; + + if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) + return 0; + + tmp = kmalloc(used_size, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + memcpy(tmp, &host_telemetry->body.error_count, used_size); + + if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0)) + goto out; + + memcpy(&adev->virt.count_cache, tmp, + min(used_size, sizeof(adev->virt.count_cache))); +out: + kfree(tmp); + + return 0; +} + +static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update) +{ + struct amdgpu_virt *virt = &adev->virt; + + /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host + * will ignore incoming guest messages. Ratelimit the guest messages to + * prevent guest self DOS. + */ + if (__ratelimit(&adev->virt.ras_telemetry_rs) || force_update) { + if (!virt->ops->req_ras_err_count(adev)) + amdgpu_virt_cache_host_error_counts(adev, + adev->virt.fw_reserve.ras_telemetry); + } + + return 0; +} + +/* Bypass ACA interface and query ECC counts directly from host */ +int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block, + struct ras_err_data *err_data) +{ + enum amd_sriov_ras_telemetry_gpu_block sriov_block; + + sriov_block = amdgpu_ras_block_to_sriov(adev, block); + + if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT || + !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block)) + return -EOPNOTSUPP; + + /* Host Access may be lost during reset, just return last cached data. */ + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_req_ras_err_count_internal(adev, false); + up_read(&adev->reset_domain->sem); + } + + err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count; + err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count; + err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count; + + return 0; +} + +int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev) +{ + unsigned long ue_count, ce_count; + + if (amdgpu_sriov_ras_telemetry_en(adev)) { + amdgpu_virt_req_ras_err_count_internal(adev, true); + amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b650a2032c42b..5381b8d596e62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -95,6 +95,7 @@ struct amdgpu_virt_ops { void (*ras_poison_handler)(struct amdgpu_device *adev, enum amdgpu_ras_block block); bool (*rcvd_ras_intr)(struct amdgpu_device *adev); + int (*req_ras_err_count)(struct amdgpu_device *adev); }; /* @@ -103,6 +104,7 @@ struct amdgpu_virt_ops { struct amdgpu_virt_fw_reserve { struct amd_sriov_msg_pf2vf_info_header *p_pf2vf; struct amd_sriov_msg_vf2pf_info_header *p_vf2pf; + void *ras_telemetry; unsigned int checksum_key; }; @@ -136,6 +138,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), /* MES info */ AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), + AMDGIM_FEATURE_RAS_CAPS = (1 << 9), + AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -276,6 +280,12 @@ struct amdgpu_virt { uint32_t autoload_ucode_id; struct mutex rlcg_reg_lock; + + union amd_sriov_ras_caps ras_en_caps; + union amd_sriov_ras_caps ras_telemetry_en_caps; + + struct ratelimit_state ras_telemetry_rs; + struct amd_sriov_ras_telemetry_error_count count_cache; }; struct amdgpu_video_codec_info; @@ -320,6 +330,15 @@ struct amdgpu_video_codec_info; #define amdgpu_sriov_vf_mmio_access_protection(adev) \ ((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT) +#define amdgpu_sriov_ras_caps_en(adev) \ +((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS) + +#define amdgpu_sriov_ras_telemetry_en(adev) \ +(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry) + +#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \ +(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk)) + static inline bool is_virtual_machine(void) { #if defined(CONFIG_X86) @@ -383,4 +402,8 @@ bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, bool write, u32 *rlcg_flag); u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id); +bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev); +int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block, + struct ras_err_data *err_data); +int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index e5f508d34ed83..f5a24d70ccd5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -100,10 +100,8 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, } *vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires); - - if (WARN_ON(*vblank_time == vblank->time)) + if (WARN_ON(ktime_to_us(*vblank_time) == ktime_to_us(vblank->time))) return true; - /* * To prevent races we roll the hrtimer forward before we do any * interrupt processing - this is how real hw works (the interrupt is @@ -111,8 +109,8 @@ static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, * the vblank core expects. Therefore we need to always correct the * timestampe by one frame. */ - *vblank_time -= output->period_ns; + *vblank_time = ktime_sub(*vblank_time, output->period_ns); return true; } @@ -123,25 +121,39 @@ static const struct drm_crtc_funcs amdgpu_vkms_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .enable_vblank = amdgpu_vkms_enable_vblank, .disable_vblank = amdgpu_vkms_disable_vblank, .get_vblank_timestamp = amdgpu_vkms_get_vblank_timestamp, +#endif }; static void amdgpu_vkms_crtc_atomic_enable(struct drm_crtc *crtc, +#if defined(HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_ENABLE_ARG_DRM_ATOMIC_STATE) struct drm_atomic_state *state) +#else + struct drm_crtc_state *state) +#endif { drm_crtc_vblank_on(crtc); } static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc, - struct drm_atomic_state *state) +#if defined(HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_ENABLE_ARG_DRM_ATOMIC_STATE) + struct drm_atomic_state *state) +#else + struct drm_crtc_state *state) +#endif { drm_crtc_vblank_off(crtc); } static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc, - struct drm_atomic_state *state) +#if defined(HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) + struct drm_atomic_state *state) +#else + struct drm_crtc_state *state) +#endif { unsigned long flags; if (crtc->state->event) { @@ -158,6 +170,7 @@ static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc, } } + static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = { .atomic_flush = amdgpu_vkms_crtc_atomic_flush, .atomic_enable = amdgpu_vkms_crtc_atomic_enable, @@ -194,12 +207,33 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, return ret; } +#ifdef AMDKCL_DRM_CONNECTOR_FUNCS_DPMS_MANDATORY +static int +amdgpu_vkms_connector_dpms(struct drm_connector *connector, int mode) +{ + return 0; +} + + +static int +amdgpu_vkms_connector_set_property(struct drm_connector *connector, + struct drm_property *property, + uint64_t val) +{ + return 0; +} +#endif + static const struct drm_connector_funcs amdgpu_vkms_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +#ifdef AMDKCL_DRM_CONNECTOR_FUNCS_DPMS_MANDATORY + .set_property = amdgpu_vkms_connector_set_property, + .dpms = amdgpu_vkms_connector_dpms, +#endif }; static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector) @@ -261,16 +295,26 @@ static const struct drm_plane_funcs amdgpu_vkms_plane_funcs = { }; static void amdgpu_vkms_plane_atomic_update(struct drm_plane *plane, +#if defined(HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS) struct drm_atomic_state *old_state) +#else + struct drm_plane_state *old_state) +#endif { return; } static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane, +#if defined(HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS) struct drm_atomic_state *state) { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); +#else + struct drm_plane_state *new_plane_state) +{ + struct drm_atomic_state *state = new_plane_state->state; +#endif struct drm_crtc_state *crtc_state; int ret; @@ -279,6 +323,7 @@ static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane, crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc); + if (IS_ERR(crtc_state)) return PTR_ERR(crtc_state); @@ -327,7 +372,7 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, return r; } - r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(amdkcl_ttm_resvp(&rbo->tbo), 1); if (r) { dev_err(adev->dev, "allocating fence slot failed (%d)\n", r); goto error_unlock; @@ -338,6 +383,7 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, else domain = AMDGPU_GEM_DOMAIN_VRAM; + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) @@ -486,16 +532,115 @@ static int amdgpu_vkms_output_init(struct drm_device *dev, struct return ret; } +#ifndef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP +static u32 amdgpu_vkms_vblank_get_counter(struct amdgpu_device *adev, int crtc) +{ + return 0; +} + +static void amdgpu_vkms_page_flip(struct amdgpu_device *adev, + int crtc_id, u64 crtc_base, bool async) +{ + return; +} + +static int amdgpu_vkms_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, + u32 *vbl, u32 *position) +{ + *vbl = 0; + *position = 0; + + return -EINVAL; +} + +static bool amdgpu_vkms_hpd_sense(struct amdgpu_device *adev, + enum amdgpu_hpd_id hpd) +{ + return true; +} + +static void amdgpu_vkms_hpd_set_polarity(struct amdgpu_device *adev, + enum amdgpu_hpd_id hpd) +{ + return; +} + +static u32 amdgpu_vkms_hpd_get_gpio_reg(struct amdgpu_device *adev) +{ + return 0; +} + +static void amdgpu_vkms_bandwidth_update(struct amdgpu_device *adev) +{ + return; +} + +static const struct amdgpu_display_funcs amdgpu_vkms_display_funcs = { + .bandwidth_update = &amdgpu_vkms_bandwidth_update, + .vblank_get_counter = &amdgpu_vkms_vblank_get_counter, + .backlight_set_level = NULL, + .backlight_get_level = NULL, + .hpd_sense = &amdgpu_vkms_hpd_sense, + .hpd_set_polarity = &amdgpu_vkms_hpd_set_polarity, + .hpd_get_gpio_reg = &amdgpu_vkms_hpd_get_gpio_reg, + .page_flip = &amdgpu_vkms_page_flip, + .page_flip_get_scanoutpos = &amdgpu_vkms_crtc_get_scanoutpos, + .add_encoder = NULL, + .add_connector = NULL, +}; + +static int amdgpu_vkms_set_crtc_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + if (type > AMDGPU_CRTC_IRQ_VBLANK6) + return -EINVAL; + + if (type >= adev->mode_info.num_crtc || !adev->mode_info.crtcs[type]) { + DRM_DEBUG("invalid crtc %d\n", type); + return -EINVAL; + } + + adev->mode_info.crtcs[type]->vsync_timer_enabled = state; + + if (state == AMDGPU_IRQ_STATE_ENABLE) + amdgpu_vkms_enable_vblank(&adev->mode_info.crtcs[type]->base); + else + amdgpu_vkms_disable_vblank(&adev->mode_info.crtcs[type]->base); + + return 0; +} + +static const struct amdgpu_irq_src_funcs amdgpu_vkms_crtc_irq_funcs = { + .set = amdgpu_vkms_set_crtc_irq_state, + .process = NULL, +}; + +static int amdgpu_vkms_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->crtc_irq.num_types = adev->mode_info.num_crtc; + adev->crtc_irq.funcs = &amdgpu_vkms_crtc_irq_funcs; + + adev->mode_info.funcs = &amdgpu_vkms_display_funcs; + adev->mode_info.num_hpd = 1; + adev->mode_info.num_dig = 1; + return 0; +} +#endif + const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = { .fb_create = amdgpu_display_user_framebuffer_create, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; -static int amdgpu_vkms_sw_init(void *handle) +static int amdgpu_vkms_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, sizeof(struct amdgpu_vkms_output), GFP_KERNEL); @@ -512,7 +657,9 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; +#endif r = amdgpu_display_modeset_create_props(adev); if (r) @@ -535,9 +682,9 @@ static int amdgpu_vkms_sw_init(void *handle) return 0; } -static int amdgpu_vkms_sw_fini(void *handle) +static int amdgpu_vkms_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i = 0; for (i = 0; i < adev->mode_info.num_crtc; i++) @@ -554,9 +701,9 @@ static int amdgpu_vkms_sw_fini(void *handle) return 0; } -static int amdgpu_vkms_hw_init(void *handle) +static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_SI @@ -599,31 +746,30 @@ static int amdgpu_vkms_hw_init(void *handle) return 0; } -static int amdgpu_vkms_hw_fini(void *handle) +static int amdgpu_vkms_hw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int amdgpu_vkms_suspend(void *handle) +static int amdgpu_vkms_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = drm_mode_config_helper_suspend(adev_to_drm(adev)); if (r) return r; - return amdgpu_vkms_hw_fini(handle); + return amdgpu_vkms_hw_fini(ip_block); } -static int amdgpu_vkms_resume(void *handle) +static int amdgpu_vkms_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_vkms_hw_init(handle); + r = amdgpu_vkms_hw_init(ip_block); if (r) return r; - return drm_mode_config_helper_resume(adev_to_drm(adev)); + return drm_mode_config_helper_resume(adev_to_drm(ip_block->adev)); } static bool amdgpu_vkms_is_idle(void *handle) @@ -631,12 +777,12 @@ static bool amdgpu_vkms_is_idle(void *handle) return true; } -static int amdgpu_vkms_wait_for_idle(void *handle) +static int amdgpu_vkms_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int amdgpu_vkms_soft_reset(void *handle) +static int amdgpu_vkms_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } @@ -655,7 +801,9 @@ static int amdgpu_vkms_set_powergating_state(void *handle, static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { .name = "amdgpu_vkms", - .early_init = NULL, +#ifndef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP + .early_init = amdgpu_vkms_early_init, +#endif .late_init = NULL, .sw_init = amdgpu_vkms_sw_init, .sw_fini = amdgpu_vkms_sw_fini, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bcb729094521f..3d99ba92ab0b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -140,8 +140,9 @@ struct amdgpu_vm_tlb_seq_struct { int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid) { - int r; + int r = 0; +#ifdef HAVE_STRUCT_XARRAY if (vm->pasid == pasid) return 0; @@ -161,7 +162,21 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pasid = pasid; } +#else + unsigned long flags; + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + if (pasid) { + r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, + GFP_ATOMIC); + } else if (vm->pasid) { + idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); + } + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + if (r < 0) + return r; + vm->pasid = pasid; +#endif return 0; } @@ -336,7 +351,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; - dma_resv_assert_held(vm->root.bo->tbo.base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(&vm->root.bo->tbo)); ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move); if (bo->tbo.type == ttm_bo_type_kernel && bo->parent) @@ -465,7 +480,6 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, { uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; - struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; @@ -486,16 +500,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock); bo = bo_base->bo; - shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) return r; - if (shadow) { - r = validate(param, shadow); - if (r) - return r; - } if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -513,7 +521,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, bo = bo_base->bo; - if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { + if (dma_resv_locking_ctx(amdkcl_ttm_resvp(&bo->tbo)) != ticket) { struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); pr_warn_ratelimited("Evicted user BO is not reserved\n"); @@ -681,6 +689,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) + ring->funcs->emit_cleaner_shader(ring); + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -742,6 +755,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } @@ -838,7 +852,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + r = vm->update_funcs->prepare(¶ms, NULL); if (r) goto error; @@ -902,10 +916,12 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, { struct amdgpu_vm *vm = params->vm; - if (!fence || !*fence) + tlb_cb->vm = vm; + if (!fence || !*fence) { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); return; + } - tlb_cb->vm = vm; if (!dma_fence_add_callback(*fence, &tlb_cb->cb, amdgpu_vm_tlb_seq_cb)) { dma_fence_put(vm->last_tlb_flush); @@ -919,7 +935,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, amdgpu_vm_tlb_fence_create(params->adev, vm, fence); /* Makes sure no PD/PT is freed before the flush */ - dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + dma_resv_add_fence(amdkcl_ttm_resvp(&vm->root.bo->tbo), *fence, DMA_RESV_USAGE_BOOKKEEP); } } @@ -933,7 +949,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, * @unlocked: unlocked invalidation during MM callback * @flush_tlb: trigger tlb invalidation after update completed * @allow_override: change MTYPE for local NUMA nodes - * @resv: fences we need to sync to + * @sync: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries @@ -949,16 +965,16 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, * 0 for success, negative erro code for failure. */ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, bool allow_override, - struct dma_resv *resv, uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, uint64_t vram_base, + bool immediate, bool unlocked, bool flush_tlb, + bool allow_override, struct amdgpu_sync *sync, + uint64_t start, uint64_t last, uint64_t flags, + uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_tlb_seq_struct *tlb_cb; struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; - enum amdgpu_sync_mode sync_mode; int r, idx; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -991,14 +1007,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.allow_override = allow_override; INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); - /* Implicitly sync to command submissions in the same VM before - * unmapping. Sync to moving fences before mapping. - */ - if (!(flags & AMDGPU_PTE_VALID)) - sync_mode = AMDGPU_SYNC_EQ_OWNER; - else - sync_mode = AMDGPU_SYNC_EXPLICIT; - amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; @@ -1013,7 +1021,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, dma_fence_put(tmp); } - r = vm->update_funcs->prepare(¶ms, resv, sync_mode); + r = vm->update_funcs->prepare(¶ms, sync); if (r) goto error_free; @@ -1023,45 +1031,59 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t tmp, num_entries, addr; num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT; - if (pages_addr) { - bool contiguous = true; - if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { - uint64_t pfn = cursor.start >> PAGE_SHIFT; - uint64_t count; + if (res && (res->mem_type == AMDGPU_PL_DGMA_IMPORT || + res->mem_type == AMDGPU_PL_DGMA)) { + uint64_t pfn = offset >> PAGE_SHIFT; + + if (res->mem_type == AMDGPU_PL_DGMA_IMPORT) { + addr = 0; + } else { + addr = pfn << PAGE_SHIFT; + addr += vram_base + + cursor.start + amdgpu_ttm_domain_start(adev, res->mem_type) - + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); + params.pages_addr = NULL; + } + } else { + if (pages_addr) { + bool contiguous = true; + + if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { + uint64_t pfn = cursor.start >> PAGE_SHIFT; + uint64_t count; - contiguous = pages_addr[pfn + 1] == - pages_addr[pfn] + PAGE_SIZE; + contiguous = pages_addr[pfn + 1] == + pages_addr[pfn] + PAGE_SIZE; - tmp = num_entries / - AMDGPU_GPU_PAGES_IN_CPU_PAGE; - for (count = 2; count < tmp; ++count) { - uint64_t idx = pfn + count; + tmp = num_entries / + AMDGPU_GPU_PAGES_IN_CPU_PAGE; + for (count = 2; count < tmp; ++count) { + uint64_t idx = pfn + count; if (contiguous != (pages_addr[idx] == - pages_addr[idx - 1] + PAGE_SIZE)) + pages_addr[idx - 1] + PAGE_SIZE)) break; - } - if (!contiguous) + } + if (!contiguous) count--; - num_entries = count * - AMDGPU_GPU_PAGES_IN_CPU_PAGE; - } + num_entries = count * + AMDGPU_GPU_PAGES_IN_CPU_PAGE; + } - if (!contiguous) { - addr = cursor.start; - params.pages_addr = pages_addr; + if (!contiguous) { + addr = cursor.start; + params.pages_addr = pages_addr; + } else { + addr = pages_addr[cursor.start >> PAGE_SHIFT]; + params.pages_addr = NULL; + } + } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) { + addr = vram_base + cursor.start; } else { - addr = pages_addr[cursor.start >> PAGE_SHIFT]; - params.pages_addr = NULL; + addr = 0; } - - } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) { - addr = vram_base + cursor.start; - } else { - addr = 0; } - tmp = start + num_entries; r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags); if (r) @@ -1103,12 +1125,12 @@ static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, * changing their location. */ if (!amdgpu_vm_is_bo_always_valid(vm, bo) && - !dma_resv_trylock(bo->tbo.base.resv)) + !dma_resv_trylock(amdkcl_ttm_resvp(&bo->tbo))) return; amdgpu_bo_get_memory(bo, stats); if (!amdgpu_vm_is_bo_always_valid(vm, bo)) - dma_resv_unlock(bo->tbo.base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(&bo->tbo)); } void amdgpu_vm_get_memory(struct amdgpu_vm *vm, @@ -1155,23 +1177,37 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; + struct dma_fence **last_update; dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; - struct dma_fence **last_update; + struct amdgpu_sync sync; bool flush_tlb = clear; - bool uncached; - struct dma_resv *resv; uint64_t vram_base; uint64_t flags; + bool uncached; int r; + struct amdgpu_device *bo_adev = adev; + amdgpu_sync_create(&sync); if (clear || !bo) { mem = NULL; - resv = vm->root.bo->tbo.base.resv; + + /* Implicitly sync to command submissions in the same VM before + * unmapping. + */ + r = amdgpu_sync_resv(adev, &sync, amdkcl_ttm_resvp(&vm->root.bo->tbo), + AMDGPU_SYNC_EQ_OWNER, vm); + if (r) + goto error_free; + if (bo) { + r = amdgpu_sync_kfd(&sync, amdkcl_ttm_resvp(&bo->tbo)); + if (r) + goto error_free; + } + } else { struct drm_gem_object *obj = &bo->tbo.base; - resv = bo->tbo.base.resv; if (obj->import_attach && bo_va->is_xgmi) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; struct drm_gem_object *gobj = dma_buf->priv; @@ -1185,11 +1221,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (mem && (mem->mem_type == TTM_PL_TT || mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; + else if (mem && mem->mem_type == AMDGPU_PL_DGMA_IMPORT) + pages_addr = (dma_addr_t *)bo->dgma_addr; + + /* Implicitly sync to moving fences before mapping anything */ + r = amdgpu_sync_resv(adev, &sync, amdkcl_ttm_resvp(&bo->tbo), + AMDGPU_SYNC_EXPLICIT, vm); + if (r) + goto error_free; } if (bo) { - struct amdgpu_device *bo_adev; - flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); if (amdgpu_bo_encrypted(bo)) @@ -1231,15 +1273,27 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, /* Apply ASIC specific mapping flags */ amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags); + if (adev != bo_adev && + !(update_flags & AMDGPU_PTE_SYSTEM) && + !mapping->bo_va->is_xgmi) { + if (amdgpu_device_is_peer_accessible(bo_adev, adev)) { + update_flags |= AMDGPU_PTE_SYSTEM; + vram_base = bo_adev->gmc.aper_base; + } else { + DRM_DEBUG_DRIVER("Failed to map the VRAM for peer device access.\n"); + return -EINVAL; + } + } + trace_amdgpu_vm_bo_update(mapping); r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, - !uncached, resv, mapping->start, mapping->last, - update_flags, mapping->offset, - vram_base, mem, pages_addr, - last_update); + !uncached, &sync, mapping->start, + mapping->last, update_flags, + mapping->offset, vram_base, mem, + pages_addr, last_update); if (r) - return r; + goto error_free; } /* If the BO is not in its preferred location add it back to @@ -1267,7 +1321,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_mapping(mapping); } - return 0; +error_free: + amdgpu_sync_free(&sync); + return r; } /** @@ -1384,7 +1440,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, */ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct dma_resv *resv = vm->root.bo->tbo.base.resv; + struct dma_resv *resv = amdkcl_ttm_resvp(&vm->root.bo->tbo); struct dma_resv_iter cursor; struct dma_fence *fence; @@ -1414,25 +1470,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence) { - struct dma_resv *resv = vm->root.bo->tbo.base.resv; struct amdgpu_bo_va_mapping *mapping; - uint64_t init_pte_value = 0; struct dma_fence *f = NULL; + struct amdgpu_sync sync; int r; + + /* + * Implicitly sync to command submissions in the same VM before + * unmapping. + */ + amdgpu_sync_create(&sync); + r = amdgpu_sync_resv(adev, &sync, amdkcl_ttm_resvp(&vm->root.bo->tbo), + AMDGPU_SYNC_EQ_OWNER, vm); + if (r) + goto error_free; + while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); r = amdgpu_vm_update_range(adev, vm, false, false, true, false, - resv, mapping->start, mapping->last, - init_pte_value, 0, 0, NULL, NULL, - &f); + &sync, mapping->start, mapping->last, + 0, 0, 0, NULL, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); - return r; + goto error_free; } } @@ -1443,7 +1508,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, dma_fence_put(f); } - return 0; +error_free: + amdgpu_sync_free(&sync); + return r; } @@ -1486,7 +1553,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); - resv = bo_va->base.bo->tbo.base.resv; + resv = amdkcl_ttm_resvp(&bo_va->base.bo->tbo); spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ @@ -1606,7 +1673,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (!bo) return bo_va; - dma_resv_assert_held(bo->tbo.base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(&bo->tbo)); if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) { bo_va->is_xgmi = true; /* Power up XGMI if it can be potentially used */ @@ -2005,7 +2072,7 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) struct amdgpu_bo *bo; bo = mapping->bo_va->base.bo; - if (dma_resv_locking_ctx(bo->tbo.base.resv) != + if (dma_resv_locking_ctx(amdkcl_ttm_resvp(&bo->tbo)) != ticket) continue; } @@ -2032,10 +2099,10 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_vm_bo_base **base; - dma_resv_assert_held(vm->root.bo->tbo.base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(&vm->root.bo->tbo)); if (bo) { - dma_resv_assert_held(bo->tbo.base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(&bo->tbo)); if (amdgpu_vm_is_bo_always_valid(vm, bo)) ttm_bo_set_bulk_move(&bo->tbo, NULL); @@ -2091,7 +2158,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP)) + if (!dma_resv_test_signaled(amdkcl_ttm_resvp(&bo->tbo), DMA_RESV_USAGE_BOOKKEEP)) return false; /* Try to block ongoing updates */ @@ -2123,10 +2190,6 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base; - /* shadow bo doesn't have bo base, its validation needs its parent */ - if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo)) - bo = bo->parent; - for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; @@ -2218,7 +2281,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + (1 << 30) - 1) >> 30; vm_size = roundup_pow_of_two( - min(max(phys_ram_gb * 3, min_vm_size), max_size)); + clamp(phys_ram_gb * 3, min_vm_size, max_size)); } adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; @@ -2271,7 +2334,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, + timeout = dma_resv_wait_timeout(amdkcl_ttm_resvp(&vm->root.bo->tbo), DMA_RESV_USAGE_BOOKKEEP, true, timeout); if (timeout <= 0) @@ -2293,9 +2356,15 @@ amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) struct amdgpu_vm *vm; unsigned long flags; +#ifdef HAVE_STRUCT_XARRAY xa_lock_irqsave(&adev->vm_manager.pasids, flags); vm = xa_load(&adev->vm_manager.pasids, pasid); xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); +#else + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); +#endif return vm; } @@ -2397,12 +2466,15 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { - struct amdgpu_ip_block *ip_block; struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; int r, i; +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + vm->va = RB_ROOT; +#else vm->va = RB_ROOT_CACHED; +#endif for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; INIT_LIST_HEAD(&vm->evicted); @@ -2427,11 +2499,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); - /* use CPU for page table update if SDMA is unavailable */ - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA); - if (!ip_block || ip_block->status.valid == false) - vm->use_cpu_for_update = true; - DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && @@ -2460,13 +2527,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); if (r) { - amdgpu_bo_unref(&root->shadow); amdgpu_bo_unref(&root_bo); goto error_free_delayed; } amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); - r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(amdkcl_ttm_resvp(&root_bo->tbo), 1); if (r) goto error_free_root; @@ -2552,11 +2618,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; - /* Free the shadow bo for compute VM */ - amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); - - goto unreserve_bo; - unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); return r; @@ -2626,11 +2687,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_fini_entities(vm); +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + if (!RB_EMPTY_ROOT(&vm->va)) { +#else if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { +#endif dev_err(adev->dev, "still active bo inside vm\n"); } rbtree_postorder_for_each_entry_safe(mapping, tmp, +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + &vm->va, rb) { +#else &vm->va.rb_root, rb) { +#endif /* Don't remove the mapping here, we don't want to trigger a * rebalance and the tree is about to be destroyed anyway. */ @@ -2696,7 +2765,12 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->vm_manager.vm_update_mode = 0; #endif +#ifdef HAVE_STRUCT_XARRAY xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ); +#else + idr_init(&adev->vm_manager.pasid_idr); + spin_lock_init(&adev->vm_manager.pasid_lock); +#endif } /** @@ -2708,8 +2782,13 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { +#ifdef HAVE_STRUCT_XARRAY WARN_ON(!xa_empty(&adev->vm_manager.pasids)); xa_destroy(&adev->vm_manager.pasids); +#else + WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); + idr_destroy(&adev->vm_manager.pasid_idr); +#endif amdgpu_vmid_mgr_fini(adev); } @@ -2760,6 +2839,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM + * @ts: Timestamp of the fault * @vmid: VMID, only used for GFX 9.4.3. * @node_id: Node_id received in IH cookie. Only applicable for * GFX 9.4.3. @@ -2770,7 +2850,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2780,15 +2860,24 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, struct amdgpu_vm *vm; int r; +#ifdef HAVE_STRUCT_XARRAY xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); vm = xa_load(&adev->vm_manager.pasids, pasid); +#else + spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); +#endif if (vm) { root = amdgpu_bo_ref(vm->root.bo); is_compute_context = vm->is_compute_context; } else { root = NULL; } +#ifdef HAVE_STRUCT_XARRAY xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); +#else + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags); +#endif if (!root) return false; @@ -2796,7 +2885,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } @@ -2806,11 +2895,20 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unref; /* Double check that the VM still exists */ +#ifdef HAVE_STRUCT_XARRAY xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); vm = xa_load(&adev->vm_manager.pasids, pasid); +#else + spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); +#endif if (vm && vm->root.bo != root) vm = NULL; +#ifdef HAVE_STRUCT_XARRAY xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); +#else + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags); +#endif if (!vm) goto error_unlock; @@ -2834,7 +2932,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } - r = dma_resv_reserve_fences(root->tbo.base.resv, 1); + r = dma_resv_reserve_fences(amdkcl_ttm_resvp(&root->tbo), 1); if (r) { pr_debug("failed %d to reserve fence slot\n", r); goto error_unlock; @@ -2964,6 +3062,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) * * Cache the fault info for later use by userspace in debugging. */ + void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr, @@ -2973,9 +3072,14 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, struct amdgpu_vm *vm; unsigned long flags; +#ifdef HAVE_STRUCT_XARRAY xa_lock_irqsave(&adev->vm_manager.pasids, flags); - vm = xa_load(&adev->vm_manager.pasids, pasid); +#else + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + vm = idr_find(&adev->vm_manager.pasid_idr, pasid); +#endif + /* Don't update the fault cache if status is 0. In the multiple * fault case, subsequent faults will return a 0 status which is * useless for userspace and replaces the useful fault status, so @@ -3008,7 +3112,11 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, WARN_ONCE(1, "Invalid vmhub %u\n", vmhub); } } +#ifdef HAVE_STRUCT_XARRAY xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); +#else + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); +#endif } /** @@ -3022,5 +3130,5 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, */ bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) { - return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; + return bo && amdkcl_ttm_resvp(&bo->tbo) == amdkcl_ttm_resvp(&vm->root.bo->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 046949c4b6959..e62471b04e955 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -304,8 +304,8 @@ struct amdgpu_vm_update_params { struct amdgpu_vm_update_funcs { int (*map_table)(struct amdgpu_bo_vm *bo); - int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv, - enum amdgpu_sync_mode sync_mode); + int (*prepare)(struct amdgpu_vm_update_params *p, + struct amdgpu_sync *sync); int (*update)(struct amdgpu_vm_update_params *p, struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); @@ -324,7 +324,11 @@ struct amdgpu_vm_fault_info { struct amdgpu_vm { /* tree of virtual addresses mapped */ +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + struct rb_root va; +#else struct rb_root_cached va; +#endif /* Lock to prevent eviction while we are updating page tables * use vm_eviction_lock/unlock(vm) @@ -456,7 +460,12 @@ struct amdgpu_vm_manager { /* PASID to VM mapping, will be used in interrupt context to * look up VM of a page fault */ +#ifdef HAVE_STRUCT_XARRAY struct xarray pasids; +#else + struct idr pasid_idr; + spinlock_t pasid_lock; +#endif /* Global registration of recent page fault information */ struct amdgpu_vm_fault_info fault_info; }; @@ -505,9 +514,10 @@ int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, bool allow_override, - struct dma_resv *resv, uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, uint64_t vram_base, + bool immediate, bool unlocked, bool flush_tlb, + bool allow_override, struct amdgpu_sync *sync, + uint64_t start, uint64_t last, uint64_t flags, + uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, @@ -558,7 +568,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); @@ -650,6 +660,7 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); + void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 3895bd7d176a9..1e79d31544a98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -39,20 +39,18 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table) * amdgpu_vm_cpu_prepare - prepare page table update with the CPU * * @p: see amdgpu_vm_update_params definition - * @resv: reservation object with embedded fence - * @sync_mode: synchronization mode + * @sync: sync obj with fences to wait on * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, - struct dma_resv *resv, - enum amdgpu_sync_mode sync_mode) + struct amdgpu_sync *sync) { - if (!resv) + if (!sync) return 0; - return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true); + return amdgpu_sync_wait(sync, true); } /** @@ -77,7 +75,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, uint64_t value; long r; - r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL, + r = dma_resv_wait_timeout(amdkcl_ttm_resvp(&vmbo->bo.tbo), DMA_RESV_USAGE_KERNEL, true, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index e39d6e7643bfb..43ecd84a61011 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -383,14 +383,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV; @@ -403,7 +395,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + r = vm->update_funcs->prepare(¶ms, NULL); if (r) goto exit; @@ -448,10 +440,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; unsigned int num_entries; - int r; memset(&bp, 0, sizeof(bp)); @@ -482,44 +471,9 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.xcp_id_plus1 = xcp_id + 1; if (vm->root.bo) - bp.resv = vm->root.bo->tbo.base.resv; - - r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_pt_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - bp.xcp_id_plus1 = xcp_id + 1; - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - amdgpu_bo_add_to_shadow_list(*vmbo); + bp.resv = amdkcl_ttm_resvp(&vm->root.bo->tbo); - return 0; + return amdgpu_bo_create_vm(adev, &bp, vmbo); } /** @@ -569,7 +523,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, return 0; error_free_pt: - amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt_bo); return r; } @@ -581,17 +534,10 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, */ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) { - struct amdgpu_bo *shadow; - if (!entry->bo) return; entry->bo->vm_bo = NULL; - shadow = amdgpu_bo_shadowed(entry->bo); - if (shadow) { - ttm_bo_set_bulk_move(&shadow->tbo, NULL); - amdgpu_bo_unref(&shadow); - } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); spin_lock(&entry->vm->status_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 9b748d7058b5c..b1c44648da82b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -35,16 +35,7 @@ */ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) { - int r; - - r = amdgpu_ttm_alloc_gart(&table->bo.tbo); - if (r) - return r; - - if (table->shadow) - r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); - - return r; + return amdgpu_ttm_alloc_gart(&table->bo.tbo); } /* Allocate a new job for @count PTE updates */ @@ -77,32 +68,24 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, * amdgpu_vm_sdma_prepare - prepare SDMA command submission * * @p: see amdgpu_vm_update_params definition - * @resv: reservation object with embedded fence - * @sync_mode: synchronization mode + * @sync: amdgpu_sync object with fences to wait for * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, - struct dma_resv *resv, - enum amdgpu_sync_mode sync_mode) + struct amdgpu_sync *sync) { - struct amdgpu_sync sync; int r; r = amdgpu_vm_sdma_alloc_job(p, 0); if (r) return r; - if (!resv) + if (!sync) return 0; - amdgpu_sync_create(&sync); - r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm); - if (!r) - r = amdgpu_sync_push_to_job(&sync, p->job); - amdgpu_sync_free(&sync); - + r = amdgpu_sync_push_to_job(sync, p->job); if (r) { p->num_dw_left = 0; amdgpu_job_free(p->job); @@ -144,7 +127,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { - dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f, + dma_resv_add_fence(amdkcl_ttm_resvp(&p->vm->root.bo->tbo), f, DMA_RESV_USAGE_BOOKKEEP); } @@ -245,15 +228,17 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, int r; /* Wait for PD/PT moves to be completed */ - dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL); + dma_resv_iter_begin(&cursor, amdkcl_ttm_resvp(&bo->tbo), DMA_RESV_USAGE_KERNEL); dma_resv_for_each_fence_unlocked(&cursor, fence) { dma_fence_get(fence); + #ifdef HAVE_STRUCT_XARRAY r = drm_sched_job_add_dependency(&p->job->base, fence); if (r) { dma_fence_put(fence); dma_resv_iter_end(&cursor); return r; } +#endif } dma_resv_iter_end(&cursor); @@ -273,17 +258,13 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (!p->pages_addr) { /* set page commands needed */ - if (vmbo->shadow) - amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr, - count, incr, flags); amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, incr, flags); return 0; } /* copy commands needed */ - ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * - (vmbo->shadow ? 2 : 1); + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; /* for padding */ ndw -= 7; @@ -298,8 +279,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, pte[i] |= flags; } - if (vmbo->shadow) - amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes); amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); pe += nptes * 8; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c index 51cddfa3f1e8f..1a7b5cbc52a8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -71,7 +71,9 @@ static void amdgpu_tlb_fence_work(struct work_struct *work) } static const struct dma_fence_ops amdgpu_tlb_fence_ops = { +#ifdef HAVE_DMA_FENCE_OPS_USE_64BIT_SEQNO .use_64bit_seqno = true, +#endif .get_driver_name = amdgpu_tlb_fence_get_driver_name, .get_timeline_name = amdgpu_tlb_fence_get_timeline_name }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 5acd20ff59797..6d96e1f21e201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -295,9 +295,9 @@ int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe) return 0; } -static int vpe_early_init(void *handle) +static int vpe_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { @@ -356,9 +356,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe) return 0; } -static int vpe_sw_init(void *handle) +static int vpe_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; int ret; @@ -381,9 +381,9 @@ static int vpe_sw_init(void *handle) return ret; } -static int vpe_sw_fini(void *handle) +static int vpe_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; release_firmware(vpe->fw); @@ -398,9 +398,9 @@ static int vpe_sw_fini(void *handle) return 0; } -static int vpe_hw_init(void *handle) +static int vpe_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; int ret; @@ -421,9 +421,9 @@ static int vpe_hw_init(void *handle) return 0; } -static int vpe_hw_fini(void *handle) +static int vpe_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; vpe_ring_stop(vpe); @@ -434,20 +434,18 @@ static int vpe_hw_fini(void *handle) return 0; } -static int vpe_suspend(void *handle) +static int vpe_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vpe.idle_work); - return vpe_hw_fini(adev); + return vpe_hw_fini(ip_block); } -static int vpe_resume(void *handle) +static int vpe_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vpe_hw_init(adev); + return vpe_hw_init(ip_block); } static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 7d26a962f811c..e53d3a1c33c37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -214,7 +214,11 @@ static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO, static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO, amdgpu_mem_info_vram_vendor, NULL); +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS static struct attribute *amdgpu_vram_mgr_attributes[] = { +#else +static const struct attribute *amdgpu_vram_mgr_attributes[] = { +#endif &dev_attr_mem_info_vram_total.attr, &dev_attr_mem_info_vis_vram_total.attr, &dev_attr_mem_info_vram_used.attr, @@ -223,6 +227,7 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, struct attribute *attr, int i) { @@ -241,6 +246,7 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { .attrs = amdgpu_vram_mgr_attributes, .is_visible = amdgpu_vram_attrs_is_visible }; +#endif /** * amdgpu_vram_mgr_vis_size - Calculate visible block size @@ -715,12 +721,15 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, unsigned long size = min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE); dma_addr_t addr; - addr = dma_map_resource(dev, phys, size, dir, - DMA_ATTR_SKIP_CPU_SYNC); - r = dma_mapping_error(dev, addr); - if (r) - goto error_unmap; - + if (dev) { + addr = dma_map_resource(dev, phys, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + r = dma_mapping_error(dev, addr); + if (r) + goto error_unmap; + } else { + addr = phys; + } sg_set_page(sg, NULL, size, 0); sg_dma_address(sg) = addr; sg_dma_len(sg) = size; @@ -734,10 +743,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, for_each_sgtable_sg((*sgt), sg, i) { if (!sg->length) continue; - - dma_unmap_resource(dev, sg->dma_address, - sg->length, dir, - DMA_ATTR_SKIP_CPU_SYNC); + if (dev) + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); } sg_free_table(*sgt); @@ -762,10 +771,12 @@ void amdgpu_vram_mgr_free_sgt(struct device *dev, struct scatterlist *sg; int i; - for_each_sgtable_sg(sgt, sg, i) - dma_unmap_resource(dev, sg->dma_address, - sg->length, dir, - DMA_ATTR_SKIP_CPU_SYNC); + if (dev) { + for_each_sgtable_sg(sgt, sg, i) + dma_unmap_resource(dev, sg->dma_address, + sg->length, dir, + DMA_ATTR_SKIP_CPU_SYNC); + } sg_free_table(sgt); kfree(sgt); } @@ -907,6 +918,9 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; int err; +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + int ret; +#endif ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); @@ -915,6 +929,12 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); mgr->default_page_size = PAGE_SIZE; +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + /* Add the two VRAM-related sysfs files */ + ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); + if (ret) + DRM_ERROR("Failed to register sysfs\n"); +#endif if (!adev->gmc.is_app_apu) { man->func = &amdgpu_vram_mgr_func; @@ -965,6 +985,9 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) drm_buddy_fini(&mgr->mm); mutex_unlock(&mgr->lock); +#ifndef HAVE_PCI_DRIVER_DEV_GROUPS + sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); +#endif ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index b256cbc2bc270..7c27b38ebb193 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -25,7 +25,6 @@ #define __AMDGPU_VRAM_MGR_H__ #include - struct amdgpu_vram_mgr { struct ttm_resource_manager manager; struct drm_buddy mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 90138bc5f03d1..648237f27d1ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -56,6 +56,27 @@ enum AMDGPU_XCP_STATE { AMDGPU_XCP_RESUME, }; +enum amdgpu_xcp_res_id { + AMDGPU_XCP_RES_XCC, + AMDGPU_XCP_RES_DMA, + AMDGPU_XCP_RES_DEC, + AMDGPU_XCP_RES_JPEG, + AMDGPU_XCP_RES_MAX, +}; + +struct amdgpu_xcp_res_details { + enum amdgpu_xcp_res_id id; + u8 num_inst; + u8 num_shared; +}; + +struct amdgpu_xcp_cfg { + u8 mode; + struct amdgpu_xcp_res_details xcp_res[AMDGPU_XCP_RES_MAX]; + u8 num_res; + struct amdgpu_xcp_mgr *xcp_mgr; +}; + struct amdgpu_xcp_ip_funcs { int (*prepare_suspend)(void *handle, uint32_t inst_mask); int (*suspend)(void *handle, uint32_t inst_mask); @@ -97,6 +118,9 @@ struct amdgpu_xcp_mgr { /* Used to determine KFD memory size limits per XCP */ unsigned int num_xcp_per_mem_partition; + struct amdgpu_xcp_cfg *xcp_cfg; + uint32_t supp_xcp_modes; + uint32_t avail_xcp_modes; }; struct amdgpu_xcp_mgr_funcs { @@ -108,7 +132,9 @@ struct amdgpu_xcp_mgr_funcs { struct amdgpu_xcp_ip *ip); int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr, struct amdgpu_xcp *xcp, uint8_t *mem_id); - + int (*get_xcp_res_info)(struct amdgpu_xcp_mgr *xcp_mgr, + int mode, + struct amdgpu_xcp_cfg *xcp_cfg); int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); @@ -180,6 +206,6 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from) #define for_each_xcp(xcp_mgr, xcp, i) \ for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \ - xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) + ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 7de449fae1e3a..8e697273d2ac4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -325,7 +325,9 @@ static struct attribute *amdgpu_xgmi_hive_attrs[] = { &amdgpu_xgmi_hive_id, NULL }; +#ifdef HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE ATTRIBUTE_GROUPS(amdgpu_xgmi_hive); +#endif static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj, struct attribute *attr, char *buf) @@ -358,7 +360,11 @@ static const struct sysfs_ops amdgpu_xgmi_hive_ops = { static const struct kobj_type amdgpu_xgmi_hive_type = { .release = amdgpu_xgmi_hive_release, .sysfs_ops = &amdgpu_xgmi_hive_ops, +#ifdef HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE .default_groups = amdgpu_xgmi_hive_groups, +#else + .default_attrs = amdgpu_xgmi_hive_attrs, +#endif }; static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, @@ -667,6 +673,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) task_barrier_init(&hive->tb); hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN; hive->hi_req_gpu = NULL; + atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE); /* * hive pstate on boot is high in vega20 so we have to go to low @@ -860,8 +867,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) if (!adev->gmc.xgmi.supported) return 0; - if (!adev->gmc.xgmi.pending_reset && - amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { ret = psp_xgmi_initialize(&adev->psp, false, true); if (ret) { dev_err(adev->dev, @@ -907,8 +913,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) task_barrier_add_task(&hive->tb); - if (!adev->gmc.xgmi.pending_reset && - amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { /* update node list for other device in the hive */ if (tmp_adev != adev) { @@ -985,7 +990,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) } } - if (!ret && !adev->gmc.xgmi.pending_reset) + if (!ret) ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); exit_unlock: @@ -1500,3 +1505,113 @@ int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev) return 0; } + +static void amdgpu_xgmi_reset_on_init_work(struct work_struct *work) +{ + struct amdgpu_hive_info *hive = + container_of(work, struct amdgpu_hive_info, reset_on_init_work); + struct amdgpu_reset_context reset_context; + struct amdgpu_device *tmp_adev; + struct list_head device_list; + int r; + + mutex_lock(&hive->hive_lock); + + INIT_LIST_HEAD(&device_list); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, &device_list); + + tmp_adev = list_first_entry(&device_list, struct amdgpu_device, + reset_list); + amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); + + reset_context.method = AMD_RESET_METHOD_ON_INIT; + reset_context.reset_req_dev = tmp_adev; + reset_context.hive = hive; + reset_context.reset_device_list = &device_list; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); + + amdgpu_reset_do_xgmi_reset_on_init(&reset_context); + mutex_unlock(&hive->hive_lock); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); + + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + r = amdgpu_ras_init_badpage_info(tmp_adev); + if (r && r != -EHWPOISON) + dev_err(tmp_adev->dev, + "error during bad page data initializtion"); + } +} + +static void amdgpu_xgmi_schedule_reset_on_init(struct amdgpu_hive_info *hive) +{ + INIT_WORK(&hive->reset_on_init_work, amdgpu_xgmi_reset_on_init_work); + amdgpu_reset_domain_schedule(hive->reset_domain, + &hive->reset_on_init_work); +} + +int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev) +{ + struct amdgpu_hive_info *hive; + bool reset_scheduled; + int num_devs; + + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + return -EINVAL; + + mutex_lock(&hive->hive_lock); + num_devs = atomic_read(&hive->number_devices); + reset_scheduled = false; + if (num_devs == adev->gmc.xgmi.num_physical_nodes) { + amdgpu_xgmi_schedule_reset_on_init(hive); + reset_scheduled = true; + } + + mutex_unlock(&hive->hive_lock); + amdgpu_put_xgmi_hive(hive); + + if (reset_scheduled) + flush_work(&hive->reset_on_init_work); + + return 0; +} + +int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive, + int req_nps_mode) +{ + struct amdgpu_device *tmp_adev; + int cur_nps_mode, r; + + /* This is expected to be called only during unload of driver. The + * request needs to be placed only once for all devices in the hive. If + * one of them fail, revert the request for previous successful devices. + * After placing the request, make hive mode as UNKNOWN so that other + * devices don't request anymore. + */ + mutex_lock(&hive->hive_lock); + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + r = adev->gmc.gmc_funcs->request_mem_partition_mode( + tmp_adev, req_nps_mode); + if (r) + goto err; + } + /* Set to UNKNOWN so that other devices don't request anymore */ + atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE); + + mutex_unlock(&hive->hive_lock); + + return 0; +err: + /* Request back current mode if one of the requests failed */ + cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev); + list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, + gmc.xgmi.head) + adev->gmc.gmc_funcs->request_mem_partition_mode(tmp_adev, + cur_nps_mode); + mutex_lock(&hive->hive_lock); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index a3bfc16de6d49..41d5f97fc77ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -45,6 +45,8 @@ struct amdgpu_hive_info { struct amdgpu_reset_domain *reset_domain; atomic_t ras_recovery; struct ras_event_manager event_mgr; + struct work_struct reset_on_init_work; + atomic_t requested_nps_mode; }; struct amdgpu_pcs_ras_field { @@ -75,5 +77,10 @@ static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); } int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev); + +int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive, + int req_nps_mode); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 6e9eeaeb3de1d..b4f9c2f4e92cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -28,17 +28,21 @@ #define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 #define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB #define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 - +#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048 +#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2 +#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64 /* * layout - * 0 64KB 65KB 66KB - * | VBIOS | PF2VF | VF2PF | Bad Page | ... - * | 64KB | 1KB | 1KB | + * 0 64KB 65KB 66KB 68KB 132KB + * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ... + * | 64KB | 1KB | 1KB | 2KB | 64KB | ... */ + #define AMD_SRIOV_MSG_SIZE_KB 1 #define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB #define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) #define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) +#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB) /* * PF2VF history log: @@ -86,30 +90,59 @@ enum amd_sriov_ucode_engine_id { union amd_sriov_msg_feature_flags { struct { - uint32_t error_log_collect : 1; - uint32_t host_load_ucodes : 1; - uint32_t host_flr_vramlost : 1; - uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; - uint32_t reg_indirect_acc : 1; - uint32_t av1_support : 1; - uint32_t vcn_rb_decouple : 1; - uint32_t mes_info_enable : 1; - uint32_t reserved : 23; + uint32_t error_log_collect : 1; + uint32_t host_load_ucodes : 1; + uint32_t host_flr_vramlost : 1; + uint32_t mm_bw_management : 1; + uint32_t pp_one_vf_mode : 1; + uint32_t reg_indirect_acc : 1; + uint32_t av1_support : 1; + uint32_t vcn_rb_decouple : 1; + uint32_t mes_info_dump_enable : 1; + uint32_t ras_caps : 1; + uint32_t ras_telemetry : 1; + uint32_t reserved : 21; } flags; uint32_t all; }; union amd_sriov_reg_access_flags { struct { - uint32_t vf_reg_access_ih : 1; - uint32_t vf_reg_access_mmhub : 1; - uint32_t vf_reg_access_gc : 1; - uint32_t reserved : 29; + uint32_t vf_reg_access_ih : 1; + uint32_t vf_reg_access_mmhub : 1; + uint32_t vf_reg_access_gc : 1; + uint32_t reserved : 29; } flags; uint32_t all; }; +union amd_sriov_ras_caps { + struct { + uint64_t block_umc : 1; + uint64_t block_sdma : 1; + uint64_t block_gfx : 1; + uint64_t block_mmhub : 1; + uint64_t block_athub : 1; + uint64_t block_pcie_bif : 1; + uint64_t block_hdp : 1; + uint64_t block_xgmi_wafl : 1; + uint64_t block_df : 1; + uint64_t block_smn : 1; + uint64_t block_sem : 1; + uint64_t block_mp0 : 1; + uint64_t block_mp1 : 1; + uint64_t block_fuse : 1; + uint64_t block_mca : 1; + uint64_t block_vcn : 1; + uint64_t block_jpeg : 1; + uint64_t block_ih : 1; + uint64_t block_mpio : 1; + uint64_t poison_propogation_mode : 1; + uint64_t reserved : 44; + } bits; + uint64_t all; +}; + union amd_sriov_msg_os_info { struct { uint32_t windows : 1; @@ -158,7 +191,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49) +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -211,6 +244,12 @@ struct amd_sriov_msg_pf2vf_info { uint32_t pcie_atomic_ops_support_flags; /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */ uint32_t gpu_capacity; + /* vf bdf on host pci tree for debug only */ + uint32_t bdf_on_host; + uint32_t more_bp; //Reserved for future use. + union amd_sriov_ras_caps ras_en_caps; + union amd_sriov_ras_caps ras_telemetry_en_caps; + /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; } __packed; @@ -283,8 +322,12 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_REL_GPU_FINI_ACCESS, MB_REQ_MSG_REQ_GPU_RESET_ACCESS, MB_REQ_MSG_REQ_GPU_INIT_DATA, + MB_REQ_MSG_PSP_VF_CMD_RELAY, MB_REQ_MSG_LOG_VF_ERROR = 200, + MB_REQ_MSG_READY_TO_RESET = 201, + MB_REQ_MSG_RAS_POISON = 202, + MB_REQ_RAS_ERROR_COUNT = 203, }; /* mailbox message send from host to guest */ @@ -297,10 +340,60 @@ enum amd_sriov_mailbox_response_message { MB_RES_MSG_FAIL, MB_RES_MSG_QUERY_ALIVE, MB_RES_MSG_GPU_INIT_DATA_READY, + MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, MB_RES_MSG_TEXT_MESSAGE = 255 }; +enum amd_sriov_ras_telemetry_gpu_block { + RAS_TELEMETRY_GPU_BLOCK_UMC = 0, + RAS_TELEMETRY_GPU_BLOCK_SDMA = 1, + RAS_TELEMETRY_GPU_BLOCK_GFX = 2, + RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3, + RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4, + RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5, + RAS_TELEMETRY_GPU_BLOCK_HDP = 6, + RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7, + RAS_TELEMETRY_GPU_BLOCK_DF = 8, + RAS_TELEMETRY_GPU_BLOCK_SMN = 9, + RAS_TELEMETRY_GPU_BLOCK_SEM = 10, + RAS_TELEMETRY_GPU_BLOCK_MP0 = 11, + RAS_TELEMETRY_GPU_BLOCK_MP1 = 12, + RAS_TELEMETRY_GPU_BLOCK_FUSE = 13, + RAS_TELEMETRY_GPU_BLOCK_MCA = 14, + RAS_TELEMETRY_GPU_BLOCK_VCN = 15, + RAS_TELEMETRY_GPU_BLOCK_JPEG = 16, + RAS_TELEMETRY_GPU_BLOCK_IH = 17, + RAS_TELEMETRY_GPU_BLOCK_MPIO = 18, + RAS_TELEMETRY_GPU_BLOCK_COUNT = 19, +}; + +struct amd_sriov_ras_telemetry_header { + uint32_t checksum; + uint32_t used_size; + uint32_t reserved[2]; +}; + +struct amd_sriov_ras_telemetry_error_count { + struct { + uint32_t ce_count; + uint32_t ue_count; + uint32_t de_count; + uint32_t ce_overflow_count; + uint32_t ue_overflow_count; + uint32_t de_overflow_count; + uint32_t reserved[6]; + } block[RAS_TELEMETRY_GPU_BLOCK_COUNT]; +}; + +struct amdsriov_ras_telemetry { + struct amd_sriov_ras_telemetry_header header; + + union { + struct amd_sriov_ras_telemetry_error_count error_count; + } body; +}; + /* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */ enum amd_sriov_gpu_init_data_version { GPU_INIT_DATA_READY_V1 = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 228fd4dd32f13..81f4354cbd6ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -75,6 +75,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_mask; ring->xcp_id = AMDGPU_XCP_NO_PARTITION; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) return; @@ -92,8 +94,6 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (aqua_vanjaram_xcp_vcn_shared(adev)) - inst_mask = 1 << (inst_idx * 2); break; default: DRM_ERROR("Not support ring type %d!", ring->funcs->type); @@ -103,6 +103,10 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { ring->xcp_id = xcp_id; + dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, + ring->xcp_id); + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; break; } } @@ -390,38 +394,31 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x struct amdgpu_xcp_ip *ip) { struct amdgpu_device *adev = xcp_mgr->adev; + int num_sdma, num_vcn, num_shared_vcn, num_xcp; int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp; - int num_sdma, num_vcn; num_sdma = adev->sdma.num_instances; num_vcn = adev->vcn.num_vcn_inst; + num_shared_vcn = 1; + + num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp; switch (xcp_mgr->mode) { case AMDGPU_SPX_PARTITION_MODE: - num_sdma_xcp = num_sdma; - num_vcn_xcp = num_vcn; - break; case AMDGPU_DPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 2; - num_vcn_xcp = num_vcn / 2; - break; case AMDGPU_TPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 3; - num_vcn_xcp = num_vcn / 3; - break; case AMDGPU_QPX_PARTITION_MODE: - num_sdma_xcp = num_sdma / 4; - num_vcn_xcp = num_vcn / 4; - break; case AMDGPU_CPX_PARTITION_MODE: - num_sdma_xcp = 2; - num_vcn_xcp = num_vcn ? 1 : 0; + num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp); + num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp); break; default: return -EINVAL; } - num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + if (num_vcn && num_xcp > num_vcn) + num_shared_vcn = num_xcp / num_vcn; switch (ip_id) { case AMDGPU_XCP_GFXHUB: @@ -437,7 +434,8 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x ip->ip_funcs = &sdma_v4_4_2_xcp_funcs; break; case AMDGPU_XCP_VCN: - ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id); + ip->inst_mask = + XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn); /* TODO : Assign IP funcs */ break; default: @@ -449,6 +447,61 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x return 0; } +static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, + int mode, + struct amdgpu_xcp_cfg *xcp_cfg) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int max_res[AMDGPU_XCP_RES_MAX] = {}; + bool res_lt_xcp; + int num_xcp, i; + + if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) + return -EINVAL; + + max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask); + max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances; + max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst; + max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst; + + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + num_xcp = 1; + break; + case AMDGPU_DPX_PARTITION_MODE: + num_xcp = 2; + break; + case AMDGPU_TPX_PARTITION_MODE: + num_xcp = 3; + break; + case AMDGPU_QPX_PARTITION_MODE: + num_xcp = 4; + break; + case AMDGPU_CPX_PARTITION_MODE: + num_xcp = NUM_XCC(adev->gfx.xcc_mask); + break; + default: + return -EINVAL; + } + + xcp_cfg->num_res = ARRAY_SIZE(max_res); + + for (i = 0; i < xcp_cfg->num_res; i++) { + res_lt_xcp = max_res[i] < num_xcp; + xcp_cfg->xcp_res[i].id = i; + xcp_cfg->xcp_res[i].num_inst = + res_lt_xcp ? 1 : max_res[i] / num_xcp; + xcp_cfg->xcp_res[i].num_inst = + i == AMDGPU_XCP_RES_JPEG ? + xcp_cfg->xcp_res[i].num_inst * + adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst; + xcp_cfg->xcp_res[i].num_shared = + res_lt_xcp ? num_xcp / max_res[i] : 1; + } + + return 0; +} + static enum amdgpu_gfx_partition __aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr) { @@ -484,7 +537,7 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, case AMDGPU_SPX_PARTITION_MODE: return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; case AMDGPU_DPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0; + return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0; case AMDGPU_TPX_PARTITION_MODE: return (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 3) && @@ -532,6 +585,57 @@ static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, return ret; } +static void +__aqua_vanjaram_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + + xcp_mgr->supp_xcp_modes = 0; + + switch (NUM_XCC(adev->gfx.xcc_mask)) { + case 8: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_DPX_PARTITION_MODE) | + BIT(AMDGPU_QPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 6: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_TPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 4: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_DPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + /* this seems only existing in emulation phase */ + case 2: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 1: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + + default: + break; + } +} + +static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + int mode; + + xcp_mgr->avail_xcp_modes = 0; + + for_each_inst(mode, xcp_mgr->supp_xcp_modes) { + if (__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) + xcp_mgr->avail_xcp_modes |= BIT(mode); + } +} + static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode, int *num_xcps) { @@ -580,6 +684,8 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, amdgpu_xcp_init(xcp_mgr, *num_xcps, mode); ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags); + if (!ret) + __aqua_vanjaram_update_available_partition_mode(xcp_mgr); unlock: if (flags & AMDGPU_XCP_OPS_KFD) amdgpu_amdkfd_unlock_kfd(adev); @@ -587,6 +693,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, return ret; } +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev, int xcc_id, uint8_t *mem_id) { @@ -643,6 +750,7 @@ static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr, return r; } +#endif static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, enum AMDGPU_XCP_IP_BLOCK ip_id, @@ -658,9 +766,13 @@ struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = { .switch_partition_mode = &aqua_vanjaram_switch_partition_mode, .query_partition_mode = &aqua_vanjaram_query_partition_mode, .get_ip_details = &aqua_vanjaram_get_xcp_ip_details, + .get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info, +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id, +#endif .select_scheds = &aqua_vanjaram_select_scheds, - .update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list + .update_partition_sched_list = + &aqua_vanjaram_update_partition_sched_list }; static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) @@ -675,6 +787,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) if (ret) return ret; + __aqua_vanjaram_update_supported_modes(adev->xcp_mgr); /* TODO: Default memory node affinity init */ return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 622634c08c7b5..abb18df7b0797 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -189,7 +189,10 @@ void amdgpu_atombios_dp_aux_init(struct amdgpu_connector *amdgpu_connector) { amdgpu_connector->ddc_bus->rec.hpd = amdgpu_connector->hpd.hpd; amdgpu_connector->ddc_bus->aux.transfer = amdgpu_atombios_dp_aux_transfer; + +#ifdef HAVE_DRM_DP_AUX_DRM_DEV amdgpu_connector->ddc_bus->aux.drm_dev = amdgpu_connector->base.dev; +#endif drm_dp_aux_init(&amdgpu_connector->ddc_bus->aux); amdgpu_connector->ddc_bus->has_aux = true; @@ -613,7 +616,11 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i dp_info->tries = 0; voltage = 0xff; while (1) { +#ifdef HAVE_DRM_DP_LINK_TRAIN_CLOCK_RECOVERY_DELAY_2ARGS drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd); +#else + drm_dp_link_train_clock_recovery_delay(dp_info->dpcd); +#endif if (drm_dp_dpcd_read_link_status(dp_info->aux, dp_info->link_status) <= 0) { @@ -678,7 +685,11 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i dp_info->tries = 0; channel_eq = false; while (1) { +#ifdef HAVE_DRM_DP_LINK_TRAIN_CHANNEL_EQ_DELAY_2ARGS drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd); +#else + drm_dp_link_train_channel_eq_delay(dp_info->dpcd); +#endif if (drm_dp_dpcd_read_link_status(dp_info->aux, dp_info->link_status) <= 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 8defca3705d51..c19af05dab725 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -186,10 +186,12 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU)) goto register_acpi_backlight; +#ifdef HAVE_ACPI_VIDEO_BACKLIGHT_USE_NATIVE if (!acpi_video_backlight_use_native()) { drm_info(dev, "Skipping amdgpu atom DIG backlight registration\n"); goto register_acpi_backlight; } +#endif pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL); if (!pdata) { @@ -228,7 +230,11 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode register_acpi_backlight: /* Try registering an ACPI video backlight device instead. */ +#ifdef HAVE_ACPI_VIDEO_REGISTER_BACKLIGHT acpi_video_register_backlight(); +#else + return; +#endif } void @@ -466,7 +472,11 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) if (amdgpu_connector->use_digital && (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)) return ATOM_ENCODER_MODE_HDMI; +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) else if (connector->display_info.is_hdmi && +#else + else if (drm_detect_hdmi_monitor(amdgpu_connector->edid) && +#endif (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else if (amdgpu_connector->use_digital) @@ -485,7 +495,11 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) if (amdgpu_audio != 0) { if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE) return ATOM_ENCODER_MODE_HDMI; +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) else if (connector->display_info.is_hdmi && +#else + else if (drm_detect_hdmi_monitor(amdgpu_connector->edid) && +#endif (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else @@ -503,7 +517,11 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) } else if (amdgpu_audio != 0) { if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE) return ATOM_ENCODER_MODE_HDMI; +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) else if (connector->display_info.is_hdmi && +#else + else if (drm_detect_hdmi_monitor(amdgpu_connector->edid) && +#endif (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO)) return ATOM_ENCODER_MODE_HDMI; else diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index cf1d5d462b676..6c18ab35cf698 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1985,9 +1985,9 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .query_video_codecs = &cik_query_video_codecs, }; -static int cik_common_early_init(void *handle) +static int cik_common_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->smc_rreg = &cik_smc_rreg; adev->smc_wreg = &cik_smc_wreg; @@ -2124,19 +2124,19 @@ static int cik_common_early_init(void *handle) return 0; } -static int cik_common_sw_init(void *handle) +static int cik_common_sw_init(struct amdgpu_ip_block *ip_block) { return 0; } -static int cik_common_sw_fini(void *handle) +static int cik_common_sw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int cik_common_hw_init(void *handle) +static int cik_common_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* move the golden regs per IP block */ cik_init_golden_registers(adev); @@ -2148,23 +2148,19 @@ static int cik_common_hw_init(void *handle) return 0; } -static int cik_common_hw_fini(void *handle) +static int cik_common_hw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int cik_common_suspend(void *handle) +static int cik_common_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return cik_common_hw_fini(adev); + return cik_common_hw_fini(ip_block); } -static int cik_common_resume(void *handle) +static int cik_common_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return cik_common_hw_init(adev); + return cik_common_hw_init(ip_block); } static bool cik_common_is_idle(void *handle) @@ -2172,12 +2168,12 @@ static bool cik_common_is_idle(void *handle) return true; } -static int cik_common_wait_for_idle(void *handle) +static int cik_common_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int cik_common_soft_reset(void *handle) +static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block) { /* XXX hard reset?? */ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 576baa9dbb0e1..9e9a58fd86cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -283,9 +283,9 @@ static void cik_ih_set_rptr(struct amdgpu_device *adev, WREG32(mmIH_RB_RPTR, ih->rptr); } -static int cik_ih_early_init(void *handle) +static int cik_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = amdgpu_irq_add_domain(adev); @@ -297,10 +297,10 @@ static int cik_ih_early_init(void *handle) return 0; } -static int cik_ih_sw_init(void *handle) +static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) @@ -311,9 +311,9 @@ static int cik_ih_sw_init(void *handle) return r; } -static int cik_ih_sw_fini(void *handle) +static int cik_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); @@ -321,34 +321,28 @@ static int cik_ih_sw_fini(void *handle) return 0; } -static int cik_ih_hw_init(void *handle) +static int cik_ih_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return cik_ih_irq_init(adev); } -static int cik_ih_hw_fini(void *handle) +static int cik_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - cik_ih_irq_disable(adev); + cik_ih_irq_disable(ip_block->adev); return 0; } -static int cik_ih_suspend(void *handle) +static int cik_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return cik_ih_hw_fini(adev); + return cik_ih_hw_fini(ip_block); } -static int cik_ih_resume(void *handle) +static int cik_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return cik_ih_hw_init(adev); + return cik_ih_hw_init(ip_block); } static bool cik_ih_is_idle(void *handle) @@ -362,11 +356,11 @@ static bool cik_ih_is_idle(void *handle) return true; } -static int cik_ih_wait_for_idle(void *handle) +static int cik_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -378,9 +372,9 @@ static int cik_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int cik_ih_soft_reset(void *handle) +static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 952737de94111..df3f429e003e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -54,7 +54,7 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev); static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev); static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev); static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); -static int cik_sdma_soft_reset(void *handle); +static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block); MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin"); MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin"); @@ -918,9 +918,9 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev, } } -static int cik_sdma_early_init(void *handle) +static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; adev->sdma.num_instances = SDMA_MAX_INSTANCE; @@ -937,10 +937,10 @@ static int cik_sdma_early_init(void *handle) return 0; } -static int cik_sdma_sw_init(void *handle) +static int cik_sdma_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r, i; /* SDMA trap event */ @@ -977,9 +977,9 @@ static int cik_sdma_sw_init(void *handle) return r; } -static int cik_sdma_sw_fini(void *handle) +static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -989,10 +989,10 @@ static int cik_sdma_sw_fini(void *handle) return 0; } -static int cik_sdma_hw_init(void *handle) +static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = cik_sdma_start(adev); if (r) @@ -1001,9 +1001,9 @@ static int cik_sdma_hw_init(void *handle) return r; } -static int cik_sdma_hw_fini(void *handle) +static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cik_ctx_switch_enable(adev, false); cik_sdma_enable(adev, false); @@ -1011,20 +1011,16 @@ static int cik_sdma_hw_fini(void *handle) return 0; } -static int cik_sdma_suspend(void *handle) +static int cik_sdma_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return cik_sdma_hw_fini(adev); + return cik_sdma_hw_fini(ip_block); } -static int cik_sdma_resume(void *handle) +static int cik_sdma_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - cik_sdma_soft_reset(handle); + cik_sdma_soft_reset(ip_block); - return cik_sdma_hw_init(adev); + return cik_sdma_hw_init(ip_block); } static bool cik_sdma_is_idle(void *handle) @@ -1039,11 +1035,11 @@ static bool cik_sdma_is_idle(void *handle) return true; } -static int cik_sdma_wait_for_idle(void *handle) +static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | @@ -1056,10 +1052,10 @@ static int cik_sdma_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int cik_sdma_soft_reset(void *handle) +static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp; /* sdma0 */ diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 55982c0064b56..06088d52d81c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -364,6 +364,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 0726437873845..cadd69a243af1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -274,9 +274,9 @@ static void cz_ih_set_rptr(struct amdgpu_device *adev, WREG32(mmIH_RB_RPTR, ih->rptr); } -static int cz_ih_early_init(void *handle) +static int cz_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = amdgpu_irq_add_domain(adev); @@ -288,10 +288,10 @@ static int cz_ih_early_init(void *handle) return 0; } -static int cz_ih_sw_init(void *handle) +static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) @@ -302,9 +302,9 @@ static int cz_ih_sw_init(void *handle) return r; } -static int cz_ih_sw_fini(void *handle) +static int cz_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); @@ -312,10 +312,10 @@ static int cz_ih_sw_fini(void *handle) return 0; } -static int cz_ih_hw_init(void *handle) +static int cz_ih_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = cz_ih_irq_init(adev); if (r) @@ -324,27 +324,21 @@ static int cz_ih_hw_init(void *handle) return 0; } -static int cz_ih_hw_fini(void *handle) +static int cz_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - cz_ih_irq_disable(adev); + cz_ih_irq_disable(ip_block->adev); return 0; } -static int cz_ih_suspend(void *handle) +static int cz_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return cz_ih_hw_fini(adev); + return cz_ih_hw_fini(ip_block); } -static int cz_ih_resume(void *handle) +static int cz_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return cz_ih_hw_init(adev); + return cz_ih_hw_init(ip_block); } static bool cz_ih_is_idle(void *handle) @@ -358,11 +352,11 @@ static bool cz_ih_is_idle(void *handle) return true; } -static int cz_ih_wait_for_idle(void *handle) +static int cz_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -374,10 +368,10 @@ static int cz_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int cz_ih_soft_reset(void *handle) +static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 742adbc460c9d..9cd254a026030 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1706,7 +1706,13 @@ static void dce_v10_0_afmt_setmode(struct drm_encoder *encoder, dce_v10_0_audio_write_sad_regs(encoder); dce_v10_0_audio_write_latency_fields(encoder, mode); +#if defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P) err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); +#elif defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_B) + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); +#else + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); +#endif /* HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P */ if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -1874,13 +1880,14 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = target_fb->obj[0]; + obj = drm_gem_fb_get_obj(target_fb, 0); abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2074,7 +2081,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - abo = gem_to_amdgpu_bo(fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2401,6 +2408,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { @@ -2487,10 +2495,12 @@ static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = { .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v10_0_crtc_destroy, .page_flip_target = amdgpu_display_crtc_page_flip_target, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_enable_vblank_kms, .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#endif }; static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2559,7 +2569,7 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc) int r; struct amdgpu_bo *abo; - abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(crtc->primary->fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2682,7 +2692,9 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { .prepare = dce_v10_0_crtc_prepare, .commit = dce_v10_0_crtc_commit, .disable = dce_v10_0_crtc_disable, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_scanout_position = amdgpu_crtc_get_scanout_position, +#endif }; static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) @@ -2736,9 +2748,9 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) return 0; } -static int dce_v10_0_early_init(void *handle) +static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg; adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg; @@ -2763,10 +2775,10 @@ static int dce_v10_0_early_init(void *handle) return 0; } -static int dce_v10_0_sw_init(void *handle) +static int dce_v10_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); @@ -2795,7 +2807,9 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; +#endif r = amdgpu_display_modeset_create_props(adev); if (r) @@ -2842,9 +2856,9 @@ static int dce_v10_0_sw_init(void *handle) return 0; } -static int dce_v10_0_sw_fini(void *handle) +static int dce_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; drm_edid_free(adev->mode_info.bios_hardcoded_edid); @@ -2860,10 +2874,10 @@ static int dce_v10_0_sw_fini(void *handle) return 0; } -static int dce_v10_0_hw_init(void *handle) +static int dce_v10_0_hw_init(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; dce_v10_0_init_golden_registers(adev); @@ -2885,10 +2899,10 @@ static int dce_v10_0_hw_init(void *handle) return 0; } -static int dce_v10_0_hw_fini(void *handle) +static int dce_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; dce_v10_0_hpd_fini(adev); @@ -2903,9 +2917,9 @@ static int dce_v10_0_hw_fini(void *handle) return 0; } -static int dce_v10_0_suspend(void *handle) +static int dce_v10_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_display_suspend_helper(adev); @@ -2915,18 +2929,18 @@ static int dce_v10_0_suspend(void *handle) adev->mode_info.bl_level = amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - return dce_v10_0_hw_fini(handle); + return dce_v10_0_hw_fini(ip_block); } -static int dce_v10_0_resume(void *handle) +static int dce_v10_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, adev->mode_info.bl_level); - ret = dce_v10_0_hw_init(handle); + ret = dce_v10_0_hw_init(ip_block); /* turn on the BL */ if (adev->mode_info.bl_encoder) { @@ -2946,22 +2960,22 @@ static bool dce_v10_0_is_idle(void *handle) return true; } -static int dce_v10_0_wait_for_idle(void *handle) +static int dce_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static bool dce_v10_0_check_soft_reset(void *handle) +static bool dce_v10_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return dce_v10_0_is_display_hung(adev); } -static int dce_v10_0_soft_reset(void *handle) +static int dce_v10_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0, tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (dce_v10_0_is_display_hung(adev)) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 8d46ebadfa466..00ba63df1111a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1755,7 +1755,13 @@ static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder, dce_v11_0_audio_write_sad_regs(encoder); dce_v11_0_audio_write_latency_fields(encoder, mode); +#if defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P) err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); +#elif defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_B) + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); +#else + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); +#endif /* HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P */ if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -1924,13 +1930,14 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = target_fb->obj[0]; + obj = drm_gem_fb_get_obj(target_fb, 0); abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2124,7 +2131,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - abo = gem_to_amdgpu_bo(fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2485,6 +2492,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { @@ -2571,10 +2579,12 @@ static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = { .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v11_0_crtc_destroy, .page_flip_target = amdgpu_display_crtc_page_flip_target, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_enable_vblank_kms, .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#endif }; static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2643,7 +2653,7 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) int r; struct amdgpu_bo *abo; - abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(crtc->primary->fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2795,7 +2805,9 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { .prepare = dce_v11_0_crtc_prepare, .commit = dce_v11_0_crtc_commit, .disable = dce_v11_0_crtc_disable, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_scanout_position = amdgpu_crtc_get_scanout_position, +#endif }; static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) @@ -2849,9 +2861,9 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) return 0; } -static int dce_v11_0_early_init(void *handle) +static int dce_v11_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg; adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg; @@ -2889,10 +2901,10 @@ static int dce_v11_0_early_init(void *handle) return 0; } -static int dce_v11_0_sw_init(void *handle) +static int dce_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); @@ -2921,7 +2933,9 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; +#endif r = amdgpu_display_modeset_create_props(adev); if (r) @@ -2969,9 +2983,9 @@ static int dce_v11_0_sw_init(void *handle) return 0; } -static int dce_v11_0_sw_fini(void *handle) +static int dce_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; drm_edid_free(adev->mode_info.bios_hardcoded_edid); @@ -2987,10 +3001,10 @@ static int dce_v11_0_sw_fini(void *handle) return 0; } -static int dce_v11_0_hw_init(void *handle) +static int dce_v11_0_hw_init(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; dce_v11_0_init_golden_registers(adev); @@ -3023,10 +3037,10 @@ static int dce_v11_0_hw_init(void *handle) return 0; } -static int dce_v11_0_hw_fini(void *handle) +static int dce_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; dce_v11_0_hpd_fini(adev); @@ -3041,9 +3055,9 @@ static int dce_v11_0_hw_fini(void *handle) return 0; } -static int dce_v11_0_suspend(void *handle) +static int dce_v11_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_display_suspend_helper(adev); @@ -3053,18 +3067,18 @@ static int dce_v11_0_suspend(void *handle) adev->mode_info.bl_level = amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - return dce_v11_0_hw_fini(handle); + return dce_v11_0_hw_fini(ip_block); } -static int dce_v11_0_resume(void *handle) +static int dce_v11_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, adev->mode_info.bl_level); - ret = dce_v11_0_hw_init(handle); + ret = dce_v11_0_hw_init(ip_block); /* turn on the BL */ if (adev->mode_info.bl_encoder) { @@ -3084,15 +3098,15 @@ static bool dce_v11_0_is_idle(void *handle) return true; } -static int dce_v11_0_wait_for_idle(void *handle) +static int dce_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int dce_v11_0_soft_reset(void *handle) +static int dce_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0, tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (dce_v11_0_is_display_hung(adev)) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index f08dc6a3886f1..9a29af5cc879d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1476,7 +1476,13 @@ static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder, ssize_t err; u32 tmp; +#if defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P) err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); +#elif defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_B) + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); +#else + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); +#endif /* HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P */ if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -1854,13 +1860,14 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = target_fb->obj[0]; + obj = drm_gem_fb_get_obj(target_fb, 0); abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2032,7 +2039,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - abo = gem_to_amdgpu_bo(fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2321,6 +2328,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { @@ -2406,10 +2414,12 @@ static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = { .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v6_0_crtc_destroy, .page_flip_target = amdgpu_display_crtc_page_flip_target, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_enable_vblank_kms, .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#endif }; static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2474,7 +2484,7 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc) int r; struct amdgpu_bo *abo; - abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(crtc->primary->fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2597,7 +2607,9 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { .prepare = dce_v6_0_crtc_prepare, .commit = dce_v6_0_crtc_commit, .disable = dce_v6_0_crtc_disable, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_scanout_position = amdgpu_crtc_get_scanout_position, +#endif }; static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) @@ -2631,9 +2643,9 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) return 0; } -static int dce_v6_0_early_init(void *handle) +static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg; adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg; @@ -2662,11 +2674,11 @@ static int dce_v6_0_early_init(void *handle) return 0; } -static int dce_v6_0_sw_init(void *handle) +static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; bool ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); @@ -2693,7 +2705,9 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; +#endif r = amdgpu_display_modeset_create_props(adev); if (r) @@ -2741,9 +2755,9 @@ static int dce_v6_0_sw_init(void *handle) return r; } -static int dce_v6_0_sw_fini(void *handle) +static int dce_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; drm_edid_free(adev->mode_info.bios_hardcoded_edid); @@ -2758,10 +2772,10 @@ static int dce_v6_0_sw_fini(void *handle) return 0; } -static int dce_v6_0_hw_init(void *handle) +static int dce_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* disable vga render */ dce_v6_0_set_vga_render_state(adev, false); @@ -2781,10 +2795,10 @@ static int dce_v6_0_hw_init(void *handle) return 0; } -static int dce_v6_0_hw_fini(void *handle) +static int dce_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; dce_v6_0_hpd_fini(adev); @@ -2799,9 +2813,9 @@ static int dce_v6_0_hw_fini(void *handle) return 0; } -static int dce_v6_0_suspend(void *handle) +static int dce_v6_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_display_suspend_helper(adev); @@ -2810,18 +2824,18 @@ static int dce_v6_0_suspend(void *handle) adev->mode_info.bl_level = amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - return dce_v6_0_hw_fini(handle); + return dce_v6_0_hw_fini(ip_block); } -static int dce_v6_0_resume(void *handle) +static int dce_v6_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, adev->mode_info.bl_level); - ret = dce_v6_0_hw_init(handle); + ret = dce_v6_0_hw_init(ip_block); /* turn on the BL */ if (adev->mode_info.bl_encoder) { @@ -2841,12 +2855,12 @@ static bool dce_v6_0_is_idle(void *handle) return true; } -static int dce_v6_0_wait_for_idle(void *handle) +static int dce_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int dce_v6_0_soft_reset(void *handle) +static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block) { DRM_INFO("xxxx: dce_v6_0_soft_reset --- no impl!!\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a6a3adf2ae134..e3b05911913a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -191,7 +191,6 @@ static void dce_v8_0_page_flip(struct amdgpu_device *adev, /* flip at hsync for async, default is vsync */ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); - /* update pitch */ WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb->pitches[0] / fb->format->cpp[0]); /* update the primary scanout addresses */ @@ -1658,7 +1657,13 @@ static void dce_v8_0_afmt_setmode(struct drm_encoder *encoder, dce_v8_0_audio_write_sad_regs(encoder); dce_v8_0_audio_write_latency_fields(encoder, mode); +#if defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P) err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); +#elif defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_B) + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); +#else + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); +#endif /* HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P */ if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -1821,13 +1826,14 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = target_fb->obj[0]; + obj = drm_gem_fb_get_obj(target_fb, 0); abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) return r; if (!atomic) { + abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); if (unlikely(r != 0)) { amdgpu_bo_unreserve(abo); @@ -2001,7 +2007,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - abo = gem_to_amdgpu_bo(fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2320,6 +2326,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, return ret; } + aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_bo_unreserve(aobj); if (ret) { @@ -2406,10 +2413,12 @@ static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = { .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v8_0_crtc_destroy, .page_flip_target = amdgpu_display_crtc_page_flip_target, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_enable_vblank_kms, .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#endif }; static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2478,7 +2487,7 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc) int r; struct amdgpu_bo *abo; - abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); + abo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(crtc->primary->fb, 0)); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2608,7 +2617,9 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { .prepare = dce_v8_0_crtc_prepare, .commit = dce_v8_0_crtc_commit, .disable = dce_v8_0_crtc_disable, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_scanout_position = amdgpu_crtc_get_scanout_position, +#endif }; static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) @@ -2642,9 +2653,9 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) return 0; } -static int dce_v8_0_early_init(void *handle) +static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg; adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg; @@ -2678,10 +2689,10 @@ static int dce_v8_0_early_init(void *handle) return 0; } -static int dce_v8_0_sw_init(void *handle) +static int dce_v8_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->mode_info.num_crtc; i++) { r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); @@ -2714,7 +2725,9 @@ static int dce_v8_0_sw_init(void *handle) else adev_to_drm(adev)->mode_config.prefer_shadow = 1; +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; +#endif r = amdgpu_display_modeset_create_props(adev); if (r) @@ -2762,9 +2775,9 @@ static int dce_v8_0_sw_init(void *handle) return 0; } -static int dce_v8_0_sw_fini(void *handle) +static int dce_v8_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; drm_edid_free(adev->mode_info.bios_hardcoded_edid); @@ -2780,10 +2793,10 @@ static int dce_v8_0_sw_fini(void *handle) return 0; } -static int dce_v8_0_hw_init(void *handle) +static int dce_v8_0_hw_init(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* disable vga render */ dce_v8_0_set_vga_render_state(adev, false); @@ -2803,10 +2816,10 @@ static int dce_v8_0_hw_init(void *handle) return 0; } -static int dce_v8_0_hw_fini(void *handle) +static int dce_v8_0_hw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; dce_v8_0_hpd_fini(adev); @@ -2821,9 +2834,9 @@ static int dce_v8_0_hw_fini(void *handle) return 0; } -static int dce_v8_0_suspend(void *handle) +static int dce_v8_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_display_suspend_helper(adev); @@ -2833,18 +2846,18 @@ static int dce_v8_0_suspend(void *handle) adev->mode_info.bl_level = amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - return dce_v8_0_hw_fini(handle); + return dce_v8_0_hw_fini(ip_block); } -static int dce_v8_0_resume(void *handle) +static int dce_v8_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, adev->mode_info.bl_level); - ret = dce_v8_0_hw_init(handle); + ret = dce_v8_0_hw_init(ip_block); /* turn on the BL */ if (adev->mode_info.bl_encoder) { @@ -2864,15 +2877,15 @@ static bool dce_v8_0_is_idle(void *handle) return true; } -static int dce_v8_0_wait_for_idle(void *handle) +static int dce_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int dce_v8_0_soft_reset(void *handle) +static int dce_v8_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0, tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (dce_v8_0_is_display_hung(adev)) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 75a6ca6459642..f0fb4717b40a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; @@ -4682,11 +4683,11 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) } } -static int gfx_v10_0_sw_init(void *handle) +static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) { int i, j, k, r, ring_id = 0; int xcc_id = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): @@ -4733,6 +4734,13 @@ static int gfx_v10_0_sw_init(void *handle) if (r) return r; + /* SPM */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_RLC, + GFX_10_1__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT, + &adev->gfx.spm_irq); + if (r) + return r; + /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_EOP_INTERRUPT, @@ -4865,10 +4873,10 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev) (void **)&adev->gfx.me.me_fw_ptr); } -static int gfx_v10_0_sw_fini(void *handle) +static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -6691,13 +6699,13 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) +static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v10_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -6749,7 +6757,7 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v10_0_gfx_init_queue(ring); + r = gfx_v10_0_kgq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -7029,13 +7037,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) { struct amdgpu_device *adev = ring->adev; struct v10_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -7097,7 +7105,7 @@ static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v10_0_kcq_init_queue(ring); + r = gfx_v10_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -7365,10 +7373,10 @@ static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data); } -static int gfx_v10_0_hw_init(void *handle) +static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!amdgpu_emu_mode) gfx_v10_0_init_golden_registers(adev); @@ -7417,20 +7425,21 @@ static int gfx_v10_0_hw_init(void *handle) return r; } -static int gfx_v10_0_hw_fini(void *handle) +static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0); /* WA added for Vangogh asic fixing the SMU suspend failure * It needs to set power gating again during gfxoff control * otherwise the gfxoff disallowing will be failed to set. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1)) - gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE); + gfx_v10_0_set_powergating_state(ip_block->adev, AMD_PG_STATE_UNGATE); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -7455,14 +7464,14 @@ static int gfx_v10_0_hw_fini(void *handle) return 0; } -static int gfx_v10_0_suspend(void *handle) +static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block) { - return gfx_v10_0_hw_fini(handle); + return gfx_v10_0_hw_fini(ip_block); } -static int gfx_v10_0_resume(void *handle) +static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block) { - return gfx_v10_0_hw_init(handle); + return gfx_v10_0_hw_init(ip_block); } static bool gfx_v10_0_is_idle(void *handle) @@ -7476,11 +7485,11 @@ static bool gfx_v10_0_is_idle(void *handle) return true; } -static int gfx_v10_0_wait_for_idle(void *handle) +static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -7494,11 +7503,11 @@ static int gfx_v10_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v10_0_soft_reset(void *handle) +static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 grbm_soft_reset = 0; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* GRBM_STATUS */ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); @@ -7677,9 +7686,97 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } -static int gfx_v10_0_early_init(void *handle) +static void gfx_v10_0_spm_start(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + uint32_t data = 0; + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, 0); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + + + data = RREG32_SOC15(GC, 0, mmRLC_SPM_PERFMON_CNTL); + data |= RLC_SPM_PERFMON_CNTL__PERFMON_RING_MODE_MASK; + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_CNTL), data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE, + CP_PERFMON_STATE_DISABLE_AND_RESET); + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE, + STRM_PERFMON_STATE_START_COUNTING); + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data); + + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_INT_CNTL), 1); +} + +static void gfx_v10_0_spm_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + uint32_t data = 0; + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, PERFMON_STATE, + CP_PERFMON_STATE_STOP_COUNTING); + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE, + CP_PERFMON_STATE_DISABLE_AND_RESET); + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data); +} + +static void gfx_v10_0_spm_set_rdptr(struct amdgpu_device *adev, u32 rptr) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), rptr); +} + +static void gfx_v10_0_set_spm_perfmon_ring_buf(struct amdgpu_device *adev, + u64 gpu_addr, u32 size) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, + mmRLC_SPM_PERFMON_RING_BASE_LO), lower_32_bits(gpu_addr)); + + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, + 0, mmRLC_SPM_PERFMON_RING_BASE_HI), upper_32_bits(gpu_addr)); + + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE), size); + + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_SEGMENT_THRESHOLD), 0xff); + + gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), 0); +} + +static const struct spm_funcs gfx_v10_0_spm_funcs = { + .start = &gfx_v10_0_spm_start, + .stop = &gfx_v10_0_spm_stop, + .set_rdptr = &gfx_v10_0_spm_set_rdptr, + .set_spm_perfmon_ring_buf = &gfx_v10_0_set_spm_perfmon_ring_buf, + .set_spm_config_size = 30, +}; + +static void gfx_v10_0_set_spm_funcs(struct amdgpu_device *adev) +{ + adev->gfx.spmfuncs = &gfx_v10_0_spm_funcs; +} + +static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; adev->gfx.funcs = &gfx_v10_0_gfx_funcs; @@ -7708,6 +7805,7 @@ static int gfx_v10_0_early_init(void *handle) adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); + gfx_v10_0_set_spm_funcs(adev); gfx_v10_0_set_kiq_pm4_funcs(adev); gfx_v10_0_set_ring_funcs(adev); gfx_v10_0_set_irq_funcs(adev); @@ -7721,15 +7819,19 @@ static int gfx_v10_0_early_init(void *handle) return gfx_v10_0_init_microcode(adev); } -static int gfx_v10_0_late_init(void *handle) +static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); if (r) return r; @@ -8610,7 +8712,11 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | PACKET3_RELEASE_MEM_GCR_GL2_WB | - PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ + PACKET3_RELEASE_MEM_GCR_GL2_INV | + PACKET3_RELEASE_MEM_GCR_GL2_US | + PACKET3_RELEASE_MEM_GCR_GL1_INV | + PACKET3_RELEASE_MEM_GCR_GLV_INV | + PACKET3_RELEASE_MEM_GCR_GLM_INV | PACKET3_RELEASE_MEM_GCR_GLM_WB | PACKET3_RELEASE_MEM_CACHE_POLICY(3) | PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | @@ -8948,7 +9054,9 @@ static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, mmSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void @@ -9415,9 +9523,159 @@ static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } -static void gfx_v10_ip_print(void *handle, struct drm_printer *p) +static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + u64 addr; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + addr = amdgpu_bo_gpu_offset(ring->mqd_obj) + + offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active); + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (ring->pipe == 0) + tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue); + else + tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue); + + gfx_v10_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0, + lower_32_bits(addr), upper_32_bits(addr), + 0, 1, 0x20); + gfx_v10_0_ring_emit_reg_wait(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff); + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_kgq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* make sure dequeue is complete*/ + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + + spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + return amdgpu_ring_test_ring(ring); +} + +static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); @@ -9479,9 +9737,9 @@ static void gfx_v10_ip_print(void *handle, struct drm_printer *p) } } -static void gfx_v10_ip_dump(void *handle) +static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); @@ -9546,6 +9804,32 @@ static void gfx_v10_ip_dump(void *handle) amdgpu_gfx_off_ctrl(adev, true); } +static int gfx_v10_0_spm_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 0); + break; + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 1); + break; + default: + break; + } + return 0; +} + +static int gfx_v10_0_spm_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + amdgpu_amdkfd_rlc_spm_interrupt(adev); + return 0; +} + static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .name = "gfx_v10_0", .early_init = gfx_v10_0_early_init, @@ -9618,6 +9902,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, + .reset = gfx_v10_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -9654,6 +9939,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, + .reset = gfx_v10_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { @@ -9723,6 +10009,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = { .process = gfx_v10_0_kiq_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v10_0_spm_irq_funcs = { + .set = gfx_v10_0_spm_set_interrupt_state, + .process = gfx_v10_0_spm_irq, +}; + static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -9731,6 +10022,9 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs; + adev->gfx.spm_irq.num_types = 1; + adev->gfx.spm_irq.funcs = &gfx_v10_0_spm_irq_funcs; + adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 22bb352786917..6b22128ab004b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1536,11 +1536,11 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) } } -static int gfx_v11_0_sw_init(void *handle) +static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { int i, j, k, r, ring_id = 0; int xcc_id = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): @@ -1732,10 +1732,10 @@ static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) (void **)&adev->gfx.rlc.rlc_autoload_ptr); } -static int gfx_v11_0_sw_fini(void *handle) +static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -1893,8 +1893,10 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + /* + * Initialize all compute VMIDs to have no GDS, GWS, or OA + * access. These should be enabled by FW for target VMIDs. + */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); @@ -3984,13 +3986,13 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) +static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v11_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -4026,7 +4028,7 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v11_0_gfx_init_queue(ring); + r = gfx_v11_0_kgq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -4321,13 +4323,13 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v11_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -4391,7 +4393,7 @@ static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v11_0_kcq_init_queue(ring); + r = gfx_v11_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -4568,10 +4570,10 @@ static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } -static int gfx_v11_0_hw_init(void *handle) +static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { if (adev->gfx.imu.funcs) { @@ -4665,9 +4667,9 @@ static int gfx_v11_0_hw_init(void *handle) return r; } -static int gfx_v11_0_hw_fini(void *handle) +static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -4703,14 +4705,14 @@ static int gfx_v11_0_hw_fini(void *handle) return 0; } -static int gfx_v11_0_suspend(void *handle) +static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) { - return gfx_v11_0_hw_fini(handle); + return gfx_v11_0_hw_fini(ip_block); } -static int gfx_v11_0_resume(void *handle) +static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) { - return gfx_v11_0_hw_init(handle); + return gfx_v11_0_hw_init(ip_block); } static bool gfx_v11_0_is_idle(void *handle) @@ -4724,11 +4726,11 @@ static bool gfx_v11_0_is_idle(void *handle) return true; } -static int gfx_v11_0_wait_for_idle(void *handle) +static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -4742,8 +4744,8 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, - int req) +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) { u32 i, tmp, val; @@ -4774,12 +4776,14 @@ static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, return 0; } -static int gfx_v11_0_soft_reset(void *handle) +static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 grbm_soft_reset = 0; u32 tmp; int r, i, j, k; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); @@ -4788,8 +4792,6 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { @@ -4814,8 +4816,10 @@ static int gfx_v11_0_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + mutex_lock(&adev->gfx.reset_sem_mutex); + r = gfx_v11_0_request_gfx_index_mutex(adev, true); if (r) { + mutex_unlock(&adev->gfx.reset_sem_mutex); DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); return r; } @@ -4829,7 +4833,8 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); /* release the gfx mutex */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + r = gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); if (r) { DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); return r; @@ -4897,15 +4902,15 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_unset_safe_mode(adev, 0); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return gfx_v11_0_cp_resume(adev); } -static bool gfx_v11_0_check_soft_reset(void *handle) +static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { int i, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; long tmo = msecs_to_jiffies(1000); @@ -4926,12 +4931,13 @@ static bool gfx_v11_0_check_soft_reset(void *handle) return false; } -static int gfx_v11_0_post_soft_reset(void *handle) +static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; /** * GFX soft reset will impact MES, need resume MES when do GFX soft reset */ - return amdgpu_mes_resume((struct amdgpu_device *)handle); + return amdgpu_mes_resume(adev); } static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) @@ -4992,9 +4998,9 @@ static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } -static int gfx_v11_0_early_init(void *handle) +static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->gfx.funcs = &gfx_v11_0_gfx_funcs; @@ -5015,9 +5021,9 @@ static int gfx_v11_0_early_init(void *handle) return gfx_v11_0_init_microcode(adev); } -static int gfx_v11_0_late_init(void *handle) +static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); @@ -5920,6 +5926,9 @@ static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; + if (adev->enable_mes) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -6085,7 +6094,9 @@ static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void @@ -6538,9 +6549,102 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } -static void gfx_v11_ip_print(void *handle, struct drm_printer *p) +static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ring->adev; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + if (r) + return r; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v11_0_kgq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kgq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r = 0; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* make sure dequeue is complete*/ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v11_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kcq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + +static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); @@ -6602,9 +6706,9 @@ static void gfx_v11_ip_print(void *handle, struct drm_printer *p) } } -static void gfx_v11_ip_dump(void *handle) +static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); @@ -6739,6 +6843,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, + .reset = gfx_v11_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { @@ -6776,6 +6881,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, + .reset = gfx_v11_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h index 10cfc29c27c9a..157a5c812259d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h @@ -26,4 +26,7 @@ extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block; +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f14e27f86e0eb..dc25fcb89b131 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,12 +202,16 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; -static const struct soc15_reg_golden golden_settings_gc_12_0[] = { +static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000), +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -1315,12 +1319,12 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) } } -static int gfx_v12_0_sw_init(void *handle) +static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) { int i, j, k, r, ring_id = 0; unsigned num_compute_rings; int xcc_id = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): @@ -1488,10 +1492,10 @@ static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) (void **)&adev->gfx.rlc.rlc_autoload_ptr); } -static int gfx_v12_0_sw_fini(void *handle) +static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -2916,13 +2920,13 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -static int gfx_v12_0_gfx_init_queue(struct amdgpu_ring *ring) +static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v12_gfx_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.gfx_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -2958,7 +2962,7 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v12_0_gfx_init_queue(ring); + r = gfx_v12_0_kgq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3054,7 +3058,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); @@ -3262,13 +3266,13 @@ static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) { struct amdgpu_device *adev = ring->adev; struct v12_compute_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); @@ -3332,7 +3336,7 @@ static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v12_0_kcq_init_queue(ring); + r = gfx_v12_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3495,20 +3499,24 @@ static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + if (adev->rev_id == 0) soc15_program_register_sequence(adev, - golden_settings_gc_12_0, - (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + golden_settings_gc_12_0_rev0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0)); break; default: break; } } -static int gfx_v12_0_hw_init(void *handle) +static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { @@ -3595,9 +3603,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_hw_fini(void *handle) +static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3635,14 +3643,14 @@ static int gfx_v12_0_hw_fini(void *handle) return 0; } -static int gfx_v12_0_suspend(void *handle) +static int gfx_v12_0_suspend(struct amdgpu_ip_block *ip_block) { - return gfx_v12_0_hw_fini(handle); + return gfx_v12_0_hw_fini(ip_block); } -static int gfx_v12_0_resume(void *handle) +static int gfx_v12_0_resume(struct amdgpu_ip_block *ip_block) { - return gfx_v12_0_hw_init(handle); + return gfx_v12_0_hw_init(ip_block); } static bool gfx_v12_0_is_idle(void *handle) @@ -3656,11 +3664,11 @@ static bool gfx_v12_0_is_idle(void *handle) return true; } -static int gfx_v12_0_wait_for_idle(void *handle) +static int gfx_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -3687,9 +3695,9 @@ static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev) return clock; } -static int gfx_v12_0_early_init(void *handle) +static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->gfx.funcs = &gfx_v12_0_gfx_funcs; @@ -3709,9 +3717,9 @@ static int gfx_v12_0_early_init(void *handle) return gfx_v12_0_init_microcode(adev); } -static int gfx_v12_0_late_init(void *handle) +static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); @@ -4501,6 +4509,9 @@ static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring) struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; + if (adev->enable_mes) + return -EINVAL; + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -4617,7 +4628,9 @@ static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void @@ -5025,9 +5038,9 @@ static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } -static void gfx_v12_ip_print(void *handle, struct drm_printer *p) +static void gfx_v12_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); @@ -5089,9 +5102,9 @@ static void gfx_v12_ip_print(void *handle, struct drm_printer *p) } } -static void gfx_v12_ip_dump(void *handle) +static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); @@ -5155,6 +5168,93 @@ static void gfx_v12_ip_dump(void *handle) amdgpu_gfx_off_ctrl(adev, true); } +static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); + if (r) { + dev_err(adev->dev, "reset via MES failed %d\n", r); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v12_0_kgq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kgq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int r, i; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + DRM_ERROR("fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v12_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + DRM_ERROR("fail to unresv mqd_obj\n"); + return r; + } + r = amdgpu_mes_map_legacy_queue(adev, ring); + if (r) { + dev_err(adev->dev, "failed to remap kcq\n"); + return r; + } + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v12_0_ip_funcs = { .name = "gfx_v12_0", .early_init = gfx_v12_0_early_init, @@ -5217,6 +5317,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, + .reset = gfx_v12_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { @@ -5251,6 +5352,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, + .reset = gfx_v12_0_reset_kcq, }; static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 564f0b9336b6a..6ac6d4dfa49f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3023,9 +3023,9 @@ static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = { .start = gfx_v6_0_rlc_start }; -static int gfx_v6_0_early_init(void *handle) +static int gfx_v6_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; @@ -3039,10 +3039,10 @@ static int gfx_v6_0_early_init(void *handle) return 0; } -static int gfx_v6_0_sw_init(void *handle) +static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r; r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); @@ -3107,10 +3107,10 @@ static int gfx_v6_0_sw_init(void *handle) return r; } -static int gfx_v6_0_sw_fini(void *handle) +static int gfx_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -3122,10 +3122,10 @@ static int gfx_v6_0_sw_fini(void *handle) return 0; } -static int gfx_v6_0_hw_init(void *handle) +static int gfx_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gfx_v6_0_constants_init(adev); @@ -3142,9 +3142,9 @@ static int gfx_v6_0_hw_init(void *handle) return r; } -static int gfx_v6_0_hw_fini(void *handle) +static int gfx_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gfx_v6_0_cp_enable(adev, false); adev->gfx.rlc.funcs->stop(adev); @@ -3153,18 +3153,14 @@ static int gfx_v6_0_hw_fini(void *handle) return 0; } -static int gfx_v6_0_suspend(void *handle) +static int gfx_v6_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return gfx_v6_0_hw_fini(adev); + return gfx_v6_0_hw_fini(ip_block); } -static int gfx_v6_0_resume(void *handle) +static int gfx_v6_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return gfx_v6_0_hw_init(adev); + return gfx_v6_0_hw_init(ip_block); } static bool gfx_v6_0_is_idle(void *handle) @@ -3177,20 +3173,20 @@ static bool gfx_v6_0_is_idle(void *handle) return true; } -static int gfx_v6_0_wait_for_idle(void *handle) +static int gfx_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v6_0_is_idle(handle)) + if (gfx_v6_0_is_idle(adev)) return 0; udelay(1); } return -ETIMEDOUT; } -static int gfx_v6_0_soft_reset(void *handle) +static int gfx_v6_0_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5fbdef04c9aae..77150c9f1e182 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2114,6 +2114,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; + /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -2133,7 +2135,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4131,9 +4134,9 @@ static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { .update_spm_vmid = gfx_v7_0_update_spm_vmid }; -static int gfx_v7_0_early_init(void *handle) +static int gfx_v7_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; @@ -4148,9 +4151,9 @@ static int gfx_v7_0_early_init(void *handle) return 0; } -static int gfx_v7_0_late_init(void *handle) +static int gfx_v7_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); @@ -4340,10 +4343,10 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, return 0; } -static int gfx_v7_0_sw_init(void *handle) +static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j, k, r, ring_id; switch (adev->asic_type) { @@ -4436,9 +4439,9 @@ static int gfx_v7_0_sw_init(void *handle) return r; } -static int gfx_v7_0_sw_fini(void *handle) +static int gfx_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) @@ -4462,10 +4465,10 @@ static int gfx_v7_0_sw_fini(void *handle) return 0; } -static int gfx_v7_0_hw_init(void *handle) +static int gfx_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gfx_v7_0_constants_init(adev); @@ -4483,9 +4486,9 @@ static int gfx_v7_0_hw_init(void *handle) return r; } -static int gfx_v7_0_hw_fini(void *handle) +static int gfx_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -4496,18 +4499,14 @@ static int gfx_v7_0_hw_fini(void *handle) return 0; } -static int gfx_v7_0_suspend(void *handle) +static int gfx_v7_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return gfx_v7_0_hw_fini(adev); + return gfx_v7_0_hw_fini(ip_block); } -static int gfx_v7_0_resume(void *handle) +static int gfx_v7_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return gfx_v7_0_hw_init(adev); + return gfx_v7_0_hw_init(ip_block); } static bool gfx_v7_0_is_idle(void *handle) @@ -4520,11 +4519,11 @@ static bool gfx_v7_0_is_idle(void *handle) return true; } -static int gfx_v7_0_wait_for_idle(void *handle) +static int gfx_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -4537,11 +4536,11 @@ static int gfx_v7_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v7_0_soft_reset(void *handle) +static int gfx_v7_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 grbm_soft_reset = 0, srbm_soft_reset = 0; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* GRBM_STATUS */ tmp = RREG32(mmGRBM_STATUS); @@ -4921,6 +4920,76 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v7_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -4972,6 +5041,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync, + .reset = gfx_v7_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a1963e6c5cab1..8f9f8e6d57a12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1894,12 +1894,12 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); -static int gfx_v8_0_sw_init(void *handle) +static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block) { int i, j, k, r, ring_id; int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (adev->asic_type) { case CHIP_TONGA: @@ -1921,6 +1921,12 @@ static int gfx_v8_0_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; + /* SPM */ + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, + VISLANDS30_IV_SRCID_RLC_STRM_PERF_MONITOR, &adev->gfx.spm_irq); + if (r) + return r; + /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); if (r) @@ -2037,9 +2043,9 @@ static int gfx_v8_0_sw_init(void *handle) return 0; } -static int gfx_v8_0_sw_fini(void *handle) +static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) @@ -4783,10 +4789,10 @@ static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) gfx_v8_0_cp_compute_enable(adev, enable); } -static int gfx_v8_0_hw_init(void *handle) +static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); @@ -4865,13 +4871,13 @@ static int gfx_v8_0_wait_for_rlc_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v8_0_wait_for_idle(void *handle) +static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v8_0_is_idle(handle)) + if (gfx_v8_0_is_idle(adev)) return 0; udelay(1); @@ -4879,9 +4885,9 @@ static int gfx_v8_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v8_0_hw_fini(void *handle) +static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -4889,6 +4895,7 @@ static int gfx_v8_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0); /* disable KCQ to avoid CPC touch memory not valid anymore */ gfx_v8_0_kcq_disable(adev); @@ -4897,8 +4904,9 @@ static int gfx_v8_0_hw_fini(void *handle) pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - if (!gfx_v8_0_wait_for_idle(adev)) + if (!gfx_v8_0_wait_for_idle(ip_block)) gfx_v8_0_cp_enable(adev, false); else pr_err("cp is busy, skip halt cp\n"); @@ -4911,19 +4919,19 @@ static int gfx_v8_0_hw_fini(void *handle) return 0; } -static int gfx_v8_0_suspend(void *handle) +static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block) { - return gfx_v8_0_hw_fini(handle); + return gfx_v8_0_hw_fini(ip_block); } -static int gfx_v8_0_resume(void *handle) +static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block) { - return gfx_v8_0_hw_init(handle); + return gfx_v8_0_hw_init(ip_block); } -static bool gfx_v8_0_check_soft_reset(void *handle) +static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 grbm_soft_reset = 0, srbm_soft_reset = 0; u32 tmp; @@ -4983,9 +4991,9 @@ static bool gfx_v8_0_check_soft_reset(void *handle) } } -static int gfx_v8_0_pre_soft_reset(void *handle) +static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && @@ -5024,9 +5032,9 @@ static int gfx_v8_0_pre_soft_reset(void *handle) return 0; } -static int gfx_v8_0_soft_reset(void *handle) +static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 grbm_soft_reset = 0, srbm_soft_reset = 0; u32 tmp; @@ -5086,9 +5094,9 @@ static int gfx_v8_0_soft_reset(void *handle) return 0; } -static int gfx_v8_0_post_soft_reset(void *handle) +static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && @@ -5254,15 +5262,107 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q }; -static int gfx_v8_0_early_init(void *handle) +static void gfx_v8_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | + (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v8_0_spm_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + uint32_t data = 0; + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, 0); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmGRBM_GFX_INDEX, data); + + + data = RREG32(mmRLC_SPM_PERFMON_CNTL); + data |= RLC_SPM_PERFMON_CNTL__PERFMON_RING_MODE_MASK; + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_PERFMON_CNTL, data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, + SPM_PERFMON_STATE, CP_PERFMON_STATE_DISABLE_AND_RESET); + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, + SPM_PERFMON_STATE, STRM_PERFMON_STATE_START_COUNTING); + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, data); + + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_INT_CNTL, 1); +} + +static void gfx_v8_0_spm_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + uint32_t data = 0; + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, + PERFMON_STATE, CP_PERFMON_STATE_STOP_COUNTING); + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, + SPM_PERFMON_STATE, CP_PERFMON_STATE_DISABLE_AND_RESET); + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, data); +} + +static void gfx_v8_0_spm_set_rdptr(struct amdgpu_device *adev, u32 rptr) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_RING_RDPTR, rptr); +} + +static void gfx_v8_0_set_spm_perfmon_ring_buf(struct amdgpu_device *adev, + u64 gpu_addr, u32 size) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, + mmRLC_SPM_PERFMON_RING_BASE_LO, lower_32_bits(gpu_addr)); + + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, + mmRLC_SPM_PERFMON_RING_BASE_HI, upper_32_bits(gpu_addr)); + + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, + mmRLC_SPM_PERFMON_RING_SIZE, size); + + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, + mmRLC_SPM_SEGMENT_THRESHOLD, 0xff); + + gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, 0); +} + +static const struct spm_funcs gfx_v8_0_spm_funcs = { + .start = &gfx_v8_0_spm_start, + .stop = &gfx_v8_0_spm_stop, + .set_rdptr = &gfx_v8_0_spm_set_rdptr, + .set_spm_perfmon_ring_buf = &gfx_v8_0_set_spm_perfmon_ring_buf, + .set_spm_config_size = 30, +}; + +static void gfx_v8_0_set_spm_funcs(struct amdgpu_device *adev) +{ + adev->gfx.spmfuncs = &gfx_v8_0_spm_funcs; +} + +static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); adev->gfx.funcs = &gfx_v8_0_gfx_funcs; + gfx_v8_0_set_spm_funcs(adev); gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); gfx_v8_0_set_gds_init(adev); @@ -5271,9 +5371,9 @@ static int gfx_v8_0_early_init(void *handle) return 0; } -static int gfx_v8_0_late_init(void *handle) +static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); @@ -5303,6 +5403,10 @@ static int gfx_v8_0_late_init(void *handle) return r; } + r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0); + if (r) + return r; + return 0; } @@ -6149,6 +6253,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. @@ -6172,7 +6277,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EOP_TC_ACTION_EN | EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -6380,6 +6486,34 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } +static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; @@ -6796,6 +6930,31 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static int gfx_v8_0_spm_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + WREG32(mmRLC_SPM_INT_CNTL, 0); + break; + case AMDGPU_IRQ_STATE_ENABLE: + WREG32(mmRLC_SPM_INT_CNTL, 1); + break; + default: + break; + } + return 0; +} + +static int gfx_v8_0_spm_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + amdgpu_amdkfd_rlc_spm_interrupt(adev); + return 0; +} /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f @@ -6853,7 +7012,48 @@ static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) gfx_v8_0_emit_wave_limit_cs(ring, i, enable); } +} + +static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + return amdgpu_ring_test_ring(ring); } static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { @@ -6923,6 +7123,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, + .reset = gfx_v8_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -7022,11 +7223,19 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { .process = gfx_v8_0_sq_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v8_0_spm_irq_funcs = { + .set = gfx_v8_0_spm_set_interrupt_state, + .process = gfx_v8_0_spm_irq, +}; + static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; + adev->gfx.spm_irq.num_types = 1; + adev->gfx.spm_irq.funcs = &gfx_v8_0_spm_irq_funcs; + adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ab10a05c7885a..89fda6a05fcd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -50,6 +50,7 @@ #include "amdgpu_ring_mux.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" +#include "gfx_v9_0_cleaner_shader.h" #include "gfx_v9_4_2.h" #include "asic_reg/pwr/pwr_10_0_offset.h" @@ -893,10 +894,18 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -906,8 +915,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -1004,12 +1013,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } + +static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, @@ -1301,6 +1345,10 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, + /* https://bbs.openkylin.top/t/topic/171497 */ + { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 }, + /* HP 705G4 DM with R5 2400G */ + { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 }, { 0, 0, 0, 0, 0 }, }; @@ -2150,12 +2198,12 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) } } -static int gfx_v9_0_sw_init(void *handle) +static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) { int i, j, k, r, ring_id; int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; unsigned int hw_prio; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -2174,9 +2222,34 @@ static int gfx_v9_0_sw_init(void *handle) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 2): + adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 88) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; + /* SPM */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_RLC, + GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT, + &adev->gfx.spm_irq); + if (r) + return r; + /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) @@ -2336,14 +2409,18 @@ static int gfx_v9_0_sw_init(void *handle) gfx_v9_0_alloc_ip_dump(adev); + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + return 0; } -static int gfx_v9_0_sw_fini(void *handle) +static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) @@ -2360,6 +2437,8 @@ static int gfx_v9_0_sw_fini(void *handle) amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); amdgpu_gfx_kiq_fini(adev, 0); + amdgpu_gfx_cleaner_shader_sw_fini(adev); + gfx_v9_0_mec_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -2371,6 +2450,8 @@ static int gfx_v9_0_sw_fini(void *handle) } gfx_v9_0_free_microcode(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -3124,6 +3205,15 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); @@ -3333,7 +3423,15 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); } else { WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, - (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); adev->gfx.kiq[0].ring.sched.ready = false; } udelay(50); @@ -3742,7 +3840,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -3754,8 +3852,8 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3819,7 +3917,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_0_kcq_init_queue(ring); + r = gfx_v9_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3854,6 +3952,10 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return r; } + if (adev->gfx.num_gfx_rings) + gfx_v9_0_cp_gfx_enable(adev, false); + gfx_v9_0_cp_compute_enable(adev, false); + r = gfx_v9_0_kiq_resume(adev); if (r) return r; @@ -3910,10 +4012,13 @@ static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) gfx_v9_0_cp_compute_enable(adev, enable); } -static int gfx_v9_0_hw_init(void *handle) +static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); if (!amdgpu_sriov_vf(adev)) gfx_v9_0_init_golden_registers(adev); @@ -3936,13 +4041,14 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_hw_fini(void *handle) +static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); @@ -3988,14 +4094,14 @@ static int gfx_v9_0_hw_fini(void *handle) return 0; } -static int gfx_v9_0_suspend(void *handle) +static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block) { - return gfx_v9_0_hw_fini(handle); + return gfx_v9_0_hw_fini(ip_block); } -static int gfx_v9_0_resume(void *handle) +static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block) { - return gfx_v9_0_hw_init(handle); + return gfx_v9_0_hw_init(ip_block); } static bool gfx_v9_0_is_idle(void *handle) @@ -4009,24 +4115,24 @@ static bool gfx_v9_0_is_idle(void *handle) return true; } -static int gfx_v9_0_wait_for_idle(void *handle) +static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v9_0_is_idle(handle)) + if (gfx_v9_0_is_idle(adev)) return 0; udelay(1); } return -ETIMEDOUT; } -static int gfx_v9_0_soft_reset(void *handle) +static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) { u32 grbm_soft_reset = 0; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* GRBM_STATUS */ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); @@ -4682,9 +4788,97 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return r; } -static int gfx_v9_0_early_init(void *handle) +static void gfx_v9_0_spm_start(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + uint32_t data = 0; + + data = RREG32_SOC15(GC, 0, mmRLC_SPM_PERFMON_CNTL); + data |= RLC_SPM_PERFMON_CNTL__PERFMON_RING_MODE_MASK; + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_CNTL), data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE, + CP_PERFMON_STATE_DISABLE_AND_RESET); + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data); + + /* When SPM is reset, RLC automatically resets wptr to 0. + * Manually reset rptr to match this. + */ + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), 0); + + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_INT_CNTL), 1); +} + +static void gfx_v9_0_spm_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + uint32_t data = 0; + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, SPM_PERFMON_STATE, + CP_PERFMON_STATE_STOP_COUNTING); + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data); + + data = REG_SET_FIELD(0, CP_PERFMON_CNTL, PERFMON_STATE, + CP_PERFMON_STATE_DISABLE_AND_RESET); + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data); + + /* When SPM is reset, RLC automatically resets wptr to 0. + * Manually reset rptr to match this. + */ + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), 0); +} + +static void gfx_v9_0_spm_set_rdptr(struct amdgpu_device *adev, u32 rptr) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), rptr); +} + +static void gfx_v9_0_set_spm_perfmon_ring_buf(struct amdgpu_device *adev, u64 gpu_addr, u32 size) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; + + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, + mmRLC_SPM_PERFMON_RING_BASE_LO), lower_32_bits(gpu_addr)); + + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, + mmRLC_SPM_PERFMON_RING_BASE_HI), upper_32_bits(gpu_addr)); + + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE), size); + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_SEGMENT_THRESHOLD), 0xff); + + gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), 0); +} + +static const struct spm_funcs gfx_v9_0_spm_funcs = { + .start = &gfx_v9_0_spm_start, + .stop = &gfx_v9_0_spm_stop, + .set_rdptr = &gfx_v9_0_spm_set_rdptr, + .set_spm_perfmon_ring_buf = &gfx_v9_0_set_spm_perfmon_ring_buf, + .set_spm_config_size = 30, +}; + +static void gfx_v9_0_set_spm_funcs(struct amdgpu_device *adev) +{ + adev->gfx.spmfuncs = &gfx_v9_0_spm_funcs; +} + +static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; adev->gfx.funcs = &gfx_v9_0_gfx_funcs; @@ -4696,6 +4890,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.xcc_mask = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); + gfx_v9_0_set_spm_funcs(adev); gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); @@ -4708,9 +4903,9 @@ static int gfx_v9_0_early_init(void *handle) return gfx_v9_0_init_microcode(adev); } -static int gfx_v9_0_ecc_late_init(void *handle) +static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; /* @@ -4742,15 +4937,19 @@ static int gfx_v9_0_ecc_late_init(void *handle) return 0; } -static int gfx_v9_0_late_init(void *handle) +static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); if (r) return r; @@ -4759,7 +4958,7 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; - r = gfx_v9_0_ecc_late_init(handle); + r = gfx_v9_0_ecc_late_init(ip_block); if (r) return r; @@ -5870,7 +6069,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, mmSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, @@ -7041,6 +7242,32 @@ static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ } +static int gfx_v9_0_spm_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 0); + break; + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 1); + break; + default: + break; + } + return 0; +} + +static int gfx_v9_0_spm_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + amdgpu_amdkfd_rlc_spm_interrupt(adev); + return 0; +} + static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, uint32_t pipe, bool enable) { @@ -7118,9 +7345,138 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_write(ring, ring->funcs->nop); } -static void gfx_v9_ip_print(void *handle, struct drm_printer *p) +static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v9_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) + return -ENOMEM; + gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v9_0_ring_emit_reg_wait(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); + gfx_v9_0_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* make sure dequeue is complete*/ + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("fail to remap queue\n"); + return r; + } + return amdgpu_ring_test_ring(ring); +} + +static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); @@ -7158,9 +7514,9 @@ static void gfx_v9_ip_print(void *handle, struct drm_printer *p) } -static void gfx_v9_ip_dump(void *handle) +static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); @@ -7200,6 +7556,13 @@ static void gfx_v9_ip_dump(void *handle) } +static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7249,7 +7612,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7270,6 +7634,10 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .reset = gfx_v9_0_reset_kgq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { @@ -7302,7 +7670,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7326,6 +7695,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .patch_cntl = gfx_v9_0_ring_patch_cntl, .patch_de = gfx_v9_0_ring_patch_de_meta, .patch_ce = gfx_v9_0_ring_patch_ce_meta, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -7346,7 +7718,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7364,6 +7737,10 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, + .reset = gfx_v9_0_reset_kcq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -7436,12 +7813,19 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { .process = amdgpu_gfx_cp_ecc_error_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_0_spm_irq_funcs = { + .set = gfx_v9_0_spm_set_interrupt_state, + .process = gfx_v9_0_spm_irq, +}; static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; + adev->gfx.spm_irq.num_types = 1; + adev->gfx.spm_irq.funcs = &gfx_v9_0_spm_irq_funcs; + adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h new file mode 100644 index 0000000000000..0b6bd09b75299 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +/* Define the cleaner shader gfx_9_0 */ +static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = { + /* Add the cleaner shader code here */ +}; + +/* Define the cleaner shader gfx_9_4_2 */ +static const u32 gfx_9_4_2_cleaner_shader_hex[] = { + 0xbf068100, 0xbf84003b, + 0xbf8a0000, 0xb07c0000, + 0xbe8200ff, 0x00000078, + 0xbf110802, 0x7e000280, + 0x7e020280, 0x7e040280, + 0x7e060280, 0x7e080280, + 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x80828802, + 0xbe803202, 0xbf84fff5, + 0xbf9c0000, 0xbe8200ff, + 0x80000000, 0x86020102, + 0xbf840011, 0xbefe00c1, + 0xbeff00c1, 0xd28c0001, + 0x0001007f, 0xd28d0001, + 0x0002027e, 0x10020288, + 0xbe8200bf, 0xbefc00c1, + 0xd89c2000, 0x00020201, + 0xd89c6040, 0x00040401, + 0x320202ff, 0x00000400, + 0x80828102, 0xbf84fff8, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbf810000, 0xbf8d0001, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea01ff, + 0x000000ee, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 3f4fd2f08163d..e3ed568eaacc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -760,7 +760,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, for (i = first_vmid; i < last_vmid; i++) { data = 0; soc15_grbm_select(adev, 0, 0, 0, i, 0); - data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm new file mode 100644 index 0000000000000..35b8cf9070bd9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// MI200 : Clear SGPRs, VGPRs and LDS +// Uses two kernels launched separately: +// 1. Clean VGPRs, LDS, and lower SGPRs +// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU +// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD +// Waves in the workgroup share the 64KB of LDS +// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383 +// Each wave clears 128 VGPRs, so all 512 in the SIMD +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. +// 2. Clean remaining SGPRs +// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU +// Waves are allocating 96 SGPRs +// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223. +// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799 +// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER +// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command +// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799 + +shader main + asic(MI200) + type(CS) + wave_size(64) +// Note: original source code from SQ team + +// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks) + + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1) + S_BARRIER + + s_movk_i32 m0, 0x0000 + s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance) + // + // CLEAR VGPRs + // + s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing +label_0005: + v_mov_b32 v0, 0 + v_mov_b32 v1, 0 + v_mov_b32 v2, 0 + v_mov_b32 v3, 0 + v_mov_b32 v4, 0 + v_mov_b32 v5, 0 + v_mov_b32 v6, 0 + v_mov_b32 v7, 0 + s_sub_u32 s2, s2, 8 + s_set_gpr_idx_idx s2 + s_cbranch_scc0 label_0005 + s_set_gpr_idx_off + + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_clean_sgpr_1: + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 +s_endpgm + +label_0023: + + s_sethalt 1 + + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop1: + + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop1 + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0xee //clear vcc + +s_endpgm +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 59417feac9a5d..4d5b48dbc0057 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -37,6 +37,7 @@ #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "gfx_v9_4_3_cleaner_shader.h" #include "amdgpu_xcp.h" #include "amdgpu_aca.h" @@ -75,42 +76,11 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_BASE), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_RB2_WPTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB1_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_IB2_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), - SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), - SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), - SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_CNTL), - SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS), @@ -122,11 +92,8 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND), @@ -139,11 +106,8 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, regCP_CE_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), @@ -151,6 +115,47 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) }; +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { + /* compute queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), +}; + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -159,10 +164,18 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); +static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -172,8 +185,8 @@ static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -270,12 +283,46 @@ static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } +static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, .kiq_query_status = gfx_v9_4_3_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, @@ -976,7 +1023,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) { uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); - uint32_t *ptr, num_xcc; + uint32_t *ptr, num_xcc, inst; num_xcc = NUM_XCC(adev->gfx.xcc_mask); @@ -987,12 +1034,44 @@ static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) } else { adev->gfx.ip_dump_core = ptr; } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } } -static int gfx_v9_4_3_sw_init(void *handle) +static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) { int i, j, k, r, ring_id, xcc_id, num_xcc; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 153) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + default: + adev->gfx.enable_cleaner_shader = false; + break; + } adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -1086,19 +1165,23 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; - - if (!amdgpu_sriov_vf(adev)) - r = amdgpu_gfx_sysfs_init(adev); + r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; gfx_v9_4_3_alloc_ip_dump(adev); - return r; + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + + return 0; } -static int gfx_v9_4_3_sw_fini(void *handle) +static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block) { int i, num_xcc; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) @@ -1110,13 +1193,16 @@ static int gfx_v9_4_3_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev, i); } + amdgpu_gfx_cleaner_shader_sw_fini(adev); + gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); - if (!amdgpu_sriov_vf(adev)) - amdgpu_gfx_sysfs_fini(adev); + amdgpu_gfx_sysfs_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); return 0; } @@ -1157,8 +1243,10 @@ static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev, soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); - /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + /* + * Initialize all compute VMIDs to have no GDS, GWS, or OA + * access. These should be enabled by FW for target VMIDs. + */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0); @@ -1611,7 +1699,15 @@ static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0); } else { WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, - (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | + CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | + CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); adev->gfx.kiq[xcc_id].ring.sched.ready = false; } udelay(50); @@ -2025,7 +2121,7 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -2037,8 +2133,8 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -2123,7 +2219,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -2150,6 +2246,8 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id); if (r) return r; + } else { + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); } r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id); @@ -2209,12 +2307,6 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable, - int xcc_id) -{ - gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id); -} - static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) { if (amdgpu_gfx_disable_kcq(adev, xcc_id)) @@ -2246,13 +2338,16 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) } gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id); - gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); } -static int gfx_v9_4_3_hw_init(void *handle) +static int gfx_v9_4_3_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); if (!amdgpu_sriov_vf(adev)) gfx_v9_4_3_init_golden_registers(adev); @@ -2270,9 +2365,9 @@ static int gfx_v9_4_3_hw_init(void *handle) return r; } -static int gfx_v9_4_3_hw_fini(void *handle) +static int gfx_v9_4_3_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, num_xcc; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -2287,14 +2382,14 @@ static int gfx_v9_4_3_hw_fini(void *handle) return 0; } -static int gfx_v9_4_3_suspend(void *handle) +static int gfx_v9_4_3_suspend(struct amdgpu_ip_block *ip_block) { - return gfx_v9_4_3_hw_fini(handle); + return gfx_v9_4_3_hw_fini(ip_block); } -static int gfx_v9_4_3_resume(void *handle) +static int gfx_v9_4_3_resume(struct amdgpu_ip_block *ip_block) { - return gfx_v9_4_3_hw_init(handle); + return gfx_v9_4_3_hw_init(ip_block); } static bool gfx_v9_4_3_is_idle(void *handle) @@ -2311,24 +2406,24 @@ static bool gfx_v9_4_3_is_idle(void *handle) return true; } -static int gfx_v9_4_3_wait_for_idle(void *handle) +static int gfx_v9_4_3_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v9_4_3_is_idle(handle)) + if (gfx_v9_4_3_is_idle(adev)) return 0; udelay(1); } return -ETIMEDOUT; } -static int gfx_v9_4_3_soft_reset(void *handle) +static int gfx_v9_4_3_soft_reset(struct amdgpu_ip_block *ip_block) { u32 grbm_soft_reset = 0; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* GRBM_STATUS */ tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS); @@ -2412,9 +2507,9 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } -static int gfx_v9_4_3_early_init(void *handle) +static int gfx_v9_4_3_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); @@ -2430,9 +2525,9 @@ static int gfx_v9_4_3_early_init(void *handle) return gfx_v9_4_3_init_microcode(adev); } -static int gfx_v9_4_3_late_init(void *handle) +static int gfx_v9_4_3_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); @@ -2963,7 +3058,9 @@ static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); } static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( @@ -3371,6 +3468,180 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me, + uint32_t pipe, uint32_t queue, + uint32_t xcc_id) +{ + int i, r; + /* make sure dequeue is complete*/ + gfx_v9_4_3_xcc_set_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me, pipe, queue, 0, GET_INST(GC, xcc_id)); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + else + r = 0; + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_4_3_xcc_unset_safe_mode(adev, xcc_id); + + return r; + +} + +static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev) +{ + /*TODO: Need check gfx9.4.4 mec fw whether supports pipe reset as well.*/ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && + adev->gfx.mec_fw_version >= 0x0000009b) + return true; + else + dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n"); + + return false; +} + +static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reset_pipe, clean_pipe; + int r; + + if (!gfx_v9_4_3_pipe_reset_support(adev)) + return -EINVAL; + + gfx_v9_4_3_xcc_set_safe_mode(adev, ring->xcc_id); + mutex_lock(&adev->srbm_mutex); + + reset_pipe = RREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL); + clean_pipe = reset_pipe; + + if (ring->me == 1) { + switch (ring->pipe) { + case 0: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE0_RESET, 1); + break; + case 1: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE1_RESET, 1); + break; + case 2: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE2_RESET, 1); + break; + case 3: + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME1_PIPE3_RESET, 1); + break; + default: + break; + } + } else { + if (ring->pipe) + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE1_RESET, 1); + else + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, + MEC_ME2_PIPE0_RESET, 1); + } + + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, reset_pipe); + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regCP_MEC_CNTL, clean_pipe); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_4_3_xcc_unset_safe_mode(adev, ring->xcc_id); + + r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id); + return r; +} + +static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + dev_err(adev->dev, "kiq ring test failed after ring: %s queue reset\n", + ring->name); + goto pipe_reset; + } + + r = gfx_v9_4_3_unmap_done(adev, ring->me, ring->pipe, ring->queue, ring->xcc_id); + if (r) + dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n"); + +pipe_reset: + if(r) { + r = gfx_v9_4_3_reset_hw_pipe(ring); + dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name, + r ? "failed" : "successfully"); + if (r) + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + dev_err(adev->dev, "fail to remap queue\n"); + return r; + } + return amdgpu_ring_test_ring(ring); +} + enum amdgpu_gfx_cp_ras_mem_id { AMDGPU_GFX_CP_MEM1 = 1, AMDGPU_GFX_CP_MEM2, @@ -4304,11 +4575,12 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no amdgpu_ring_write(ring, ring->funcs->nop); } -static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) +static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; - uint32_t xcc_id, xcc_offset, num_xcc; + struct amdgpu_device *adev = ip_block->adev; + uint32_t i, j, k; + uint32_t xcc_id, xcc_offset, inst_offset; + uint32_t num_xcc, reg, num_inst; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); if (!adev->gfx.ip_dump_core) @@ -4324,13 +4596,51 @@ static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) gc_reg_list_9_4_3[i].reg_name, adev->gfx.ip_dump_core[xcc_offset + i]); } + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n", + num_xcc, + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, + "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", + xcc_id, i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + } + inst_offset += reg_count; + } + } + } + } } -static void gfx_v9_4_3_ip_dump(void *handle) +static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; - uint32_t xcc_id, xcc_offset, num_xcc; + struct amdgpu_device *adev = ip_block->adev; + uint32_t i, j, k; + uint32_t num_xcc, reg, num_inst; + uint32_t xcc_id, xcc_offset, inst_offset; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); if (!adev->gfx.ip_dump_core) @@ -4347,6 +4657,49 @@ static void gfx_v9_4_3_ip_dump(void *handle) GET_INST(GC, xcc_id))); } amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc15_grbm_select(adev, 1 + i, j, k, 0, + GET_INST(GC, xcc_id)); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); + } + inst_offset += reg_count; + } + } + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + +static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { @@ -4388,7 +4741,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_4_3_emit_mem_sync */ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, .emit_fence = gfx_v9_4_3_ring_emit_fence, @@ -4406,6 +4760,10 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .soft_recovery = gfx_v9_4_3_ring_soft_recovery, .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, + .reset = gfx_v9_4_3_reset_kcq, + .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm new file mode 100644 index 0000000000000..d5325ef80ab02 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// MI300 : Clear SGPRs, VGPRs and LDS +// Uses two kernels launched separately: +// 1. Clean VGPRs, LDS, and lower SGPRs +// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU +// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD +// Waves in the workgroup share the 64KB of LDS +// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383 +// Each wave clears 128 VGPRs, so all 512 in the SIMD +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. +// 2. Clean remaining SGPRs +// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU +// Waves are allocating 96 SGPRs +// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223. +// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799 +// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER +// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command +// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799 + +shader main + asic(MI300) + type(CS) + wave_size(64) +// Note: original source code from SQ team + +// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks) + + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1) + S_BARRIER + + s_movk_i32 m0, 0x0000 + s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance) + // + // CLEAR VGPRs + // + s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing +label_0005: + v_mov_b32 v0, 0 + v_mov_b32 v1, 0 + v_mov_b32 v2, 0 + v_mov_b32 v3, 0 + v_mov_b32 v4, 0 + v_mov_b32 v5, 0 + v_mov_b32 v6, 0 + v_mov_b32 v7, 0 + s_sub_u32 s2, s2, 8 + s_set_gpr_idx_idx s2 + s_cbranch_scc0 label_0005 + s_set_gpr_idx_off + + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iteraions + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_clean_sgpr_1: + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 +s_endpgm + +label_0023: + + s_sethalt 1 + + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop1: + + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop1 + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0xee //clear vcc + +s_endpgm +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h new file mode 100644 index 0000000000000..69aa567c6c1d1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_9_4_3 */ +static const u32 gfx_9_4_3_cleaner_shader_hex[] = { + 0xbf068100, 0xbf84003b, + 0xbf8a0000, 0xb07c0000, + 0xbe8200ff, 0x00000078, + 0xbf110802, 0x7e000280, + 0x7e020280, 0x7e040280, + 0x7e060280, 0x7e080280, + 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x80828802, + 0xbe803202, 0xbf84fff5, + 0xbf9c0000, 0xbe8200ff, + 0x80000000, 0x86020102, + 0xbf840011, 0xbefe00c1, + 0xbeff00c1, 0xd28c0001, + 0x0001007f, 0xd28d0001, + 0x0002027e, 0x10020288, + 0xbe8200bf, 0xbefc00c1, + 0xd89c2000, 0x00020201, + 0xd89c6040, 0x00040401, + 0x320202ff, 0x00000400, + 0x80828102, 0xbf84fff8, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbf810000, 0xbf8d0001, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea01ff, + 0x000000ee, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d200310d17319..0e3ddea7b8e0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,23 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } -static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) - return false; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, @@ -468,5 +451,4 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 72109abe7c86c..ed8e130c7d195 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -622,22 +622,6 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } -static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 fed, status; - - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -646,7 +630,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f0ceab3ce5bfa..2f1af50aae1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } @@ -149,7 +150,6 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0)); } @@ -629,9 +629,9 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) } -static int gmc_v10_0_early_init(void *handle) +static int gmc_v10_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v10_0_set_mmhub_funcs(adev); gmc_v10_0_set_gfxhub_funcs(adev); @@ -650,9 +650,9 @@ static int gmc_v10_0_early_init(void *handle) return 0; } -static int gmc_v10_0_late_init(void *handle) +static int gmc_v10_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_gmc_allocate_vm_inv_eng(adev); @@ -676,7 +676,6 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) @@ -768,10 +767,10 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } -static int gmc_v10_0_sw_init(void *handle) +static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->gfxhub.funcs->init(adev); @@ -919,9 +918,9 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_table_vram_free(adev); } -static int gmc_v10_0_sw_fini(void *handle) +static int gmc_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_vm_manager_fini(adev); gmc_v10_0_gart_fini(adev); @@ -984,9 +983,9 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) return 0; } -static int gmc_v10_0_hw_init(void *handle) +static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode; @@ -1031,9 +1030,9 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) adev->mmhub.funcs->gart_disable(adev); } -static int gmc_v10_0_hw_fini(void *handle) +static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v10_0_gart_disable(adev); @@ -1052,25 +1051,22 @@ static int gmc_v10_0_hw_fini(void *handle) return 0; } -static int gmc_v10_0_suspend(void *handle) +static int gmc_v10_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - gmc_v10_0_hw_fini(adev); + gmc_v10_0_hw_fini(ip_block); return 0; } -static int gmc_v10_0_resume(void *handle) +static int gmc_v10_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v10_0_hw_init(adev); + r = gmc_v10_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } @@ -1081,13 +1077,13 @@ static bool gmc_v10_0_is_idle(void *handle) return true; } -static int gmc_v10_0_wait_for_idle(void *handle) +static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* There is no need to wait for MC idle in GMC v10.*/ return 0; } -static int gmc_v10_0_soft_reset(void *handle) +static int gmc_v10_0_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 2797fd84432b2..82b905271547f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -120,7 +120,6 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0)); } @@ -601,9 +600,9 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) } } -static int gmc_v11_0_early_init(void *handle) +static int gmc_v11_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v11_0_set_gfxhub_funcs(adev); gmc_v11_0_set_mmhub_funcs(adev); @@ -622,9 +621,9 @@ static int gmc_v11_0_early_init(void *handle) return 0; } -static int gmc_v11_0_late_init(void *handle) +static int gmc_v11_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_gmc_allocate_vm_inv_eng(adev); @@ -645,7 +644,6 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, base = adev->mmhub.funcs->get_fb_location(adev); - amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH); if (!amdgpu_sriov_vf(adev) && @@ -729,10 +727,10 @@ static int gmc_v11_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } -static int gmc_v11_0_sw_init(void *handle) +static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->init(adev); @@ -849,9 +847,9 @@ static void gmc_v11_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_table_vram_free(adev); } -static int gmc_v11_0_sw_fini(void *handle) +static int gmc_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_vm_manager_fini(adev); gmc_v11_0_gart_fini(adev); @@ -908,9 +906,9 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) return 0; } -static int gmc_v11_0_hw_init(void *handle) +static int gmc_v11_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode; @@ -940,9 +938,9 @@ static void gmc_v11_0_gart_disable(struct amdgpu_device *adev) adev->mmhub.funcs->gart_disable(adev); } -static int gmc_v11_0_hw_fini(void *handle) +static int gmc_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ @@ -961,25 +959,22 @@ static int gmc_v11_0_hw_fini(void *handle) return 0; } -static int gmc_v11_0_suspend(void *handle) +static int gmc_v11_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - gmc_v11_0_hw_fini(adev); + gmc_v11_0_hw_fini(ip_block); return 0; } -static int gmc_v11_0_resume(void *handle) +static int gmc_v11_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v11_0_hw_init(adev); + r = gmc_v11_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } @@ -990,13 +985,13 @@ static bool gmc_v11_0_is_idle(void *handle) return true; } -static int gmc_v11_0_wait_for_idle(void *handle) +static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* There is no need to wait for MC idle in GMC v11.*/ return 0; } -static int gmc_v11_0_soft_reset(void *handle) +static int gmc_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index edcb5351f8cca..dcc44e688175e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -498,9 +498,6 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, uint64_t *flags) { struct amdgpu_bo *bo = mapping->bo_va->base.bo; - struct amdgpu_device *bo_adev; - bool coherent, is_system; - *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -516,25 +513,11 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_VALID; } - if (!bo) - return; - - if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT | - AMDGPU_GEM_CREATE_UNCACHED)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); - - bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); - coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || - (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); - if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; - /* WA for HW bug */ - if (is_system || ((bo_adev != adev) && coherent)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); - + if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); } static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) @@ -604,9 +587,9 @@ static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev) } } -static int gmc_v12_0_early_init(void *handle) +static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v12_0_set_gfxhub_funcs(adev); gmc_v12_0_set_mmhub_funcs(adev); @@ -624,9 +607,9 @@ static int gmc_v12_0_early_init(void *handle) return 0; } -static int gmc_v12_0_late_init(void *handle) +static int gmc_v12_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_gmc_allocate_vm_inv_eng(adev); @@ -731,10 +714,10 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } -static int gmc_v12_0_sw_init(void *handle) +static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->init(adev); @@ -841,9 +824,9 @@ static void gmc_v12_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_table_vram_free(adev); } -static int gmc_v12_0_sw_fini(void *handle) +static int gmc_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_vm_manager_fini(adev); gmc_v12_0_gart_fini(adev); @@ -894,10 +877,10 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) return 0; } -static int gmc_v12_0_hw_init(void *handle) +static int gmc_v12_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* The sequence of these two function calls matters.*/ gmc_v12_0_init_golden_registers(adev); @@ -924,9 +907,9 @@ static void gmc_v12_0_gart_disable(struct amdgpu_device *adev) adev->mmhub.funcs->gart_disable(adev); } -static int gmc_v12_0_hw_fini(void *handle) +static int gmc_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ @@ -945,25 +928,22 @@ static int gmc_v12_0_hw_fini(void *handle) return 0; } -static int gmc_v12_0_suspend(void *handle) +static int gmc_v12_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - gmc_v12_0_hw_fini(adev); + gmc_v12_0_hw_fini(ip_block); return 0; } -static int gmc_v12_0_resume(void *handle) +static int gmc_v12_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v12_0_hw_init(adev); + r = gmc_v12_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } @@ -974,13 +954,13 @@ static bool gmc_v12_0_is_idle(void *handle) return true; } -static int gmc_v12_0_wait_for_idle(void *handle) +static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* There is no need to wait for MC idle in GMC v11.*/ return 0; } -static int gmc_v12_0_soft_reset(void *handle) +static int gmc_v12_0_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index d36725666b54c..b66707f7d5941 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -43,7 +43,7 @@ static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev); -static int gmc_v6_0_wait_for_idle(void *handle); +static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block); MODULE_FIRMWARE("amdgpu/tahiti_mc.bin"); MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin"); @@ -64,8 +64,13 @@ MODULE_FIRMWARE("amdgpu/si58_mc.bin"); static void gmc_v6_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; + struct amdgpu_ip_block *ip_block; - gmc_v6_0_wait_for_idle((void *)adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); + if (!ip_block) + return; + + gmc_v6_0_wait_for_idle(ip_block); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) { @@ -205,7 +210,6 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; - amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } @@ -213,6 +217,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, static void gmc_v6_0_mc_program(struct amdgpu_device *adev) { int i, j; + struct amdgpu_ip_block *ip_block; + /* Initialize HDP */ for (i = 0, j = 0; i < 32; i++, j += 0x6) { @@ -224,7 +230,11 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (gmc_v6_0_wait_for_idle((void *)adev)) + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); + if (!ip_block) + return; + + if (gmc_v6_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); if (adev->mode_info.num_crtc) { @@ -251,7 +261,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22); WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22); - if (gmc_v6_0_wait_for_idle((void *)adev)) + if (gmc_v6_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } @@ -762,9 +772,9 @@ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type) } } -static int gmc_v6_0_early_init(void *handle) +static int gmc_v6_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v6_0_set_gmc_funcs(adev); gmc_v6_0_set_irq_funcs(adev); @@ -772,9 +782,9 @@ static int gmc_v6_0_early_init(void *handle) return 0; } -static int gmc_v6_0_late_init(void *handle) +static int gmc_v6_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); @@ -799,10 +809,10 @@ static unsigned int gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } -static int gmc_v6_0_sw_init(void *handle) +static int gmc_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); @@ -876,9 +886,9 @@ static int gmc_v6_0_sw_init(void *handle) return 0; } -static int gmc_v6_0_sw_fini(void *handle) +static int gmc_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); @@ -889,10 +899,10 @@ static int gmc_v6_0_sw_fini(void *handle) return 0; } -static int gmc_v6_0_hw_init(void *handle) +static int gmc_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v6_0_mc_program(adev); @@ -914,9 +924,9 @@ static int gmc_v6_0_hw_init(void *handle) return 0; } -static int gmc_v6_0_hw_fini(void *handle) +static int gmc_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v6_0_gart_disable(adev); @@ -924,21 +934,19 @@ static int gmc_v6_0_hw_fini(void *handle) return 0; } -static int gmc_v6_0_suspend(void *handle) +static int gmc_v6_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - gmc_v6_0_hw_fini(adev); + gmc_v6_0_hw_fini(ip_block); return 0; } -static int gmc_v6_0_resume(void *handle) +static int gmc_v6_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; - r = gmc_v6_0_hw_init(adev); + r = gmc_v6_0_hw_init(ip_block); if (r) return r; @@ -950,6 +958,7 @@ static int gmc_v6_0_resume(void *handle) static bool gmc_v6_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | @@ -959,13 +968,13 @@ static bool gmc_v6_0_is_idle(void *handle) return true; } -static int gmc_v6_0_wait_for_idle(void *handle) +static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gmc_v6_0_is_idle(handle)) + if (gmc_v6_0_is_idle(adev)) return 0; udelay(1); } @@ -973,9 +982,9 @@ static int gmc_v6_0_wait_for_idle(void *handle) } -static int gmc_v6_0_soft_reset(void *handle) +static int gmc_v6_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -992,7 +1001,8 @@ static int gmc_v6_0_soft_reset(void *handle) if (srbm_soft_reset) { gmc_v6_0_mc_stop(adev); - if (gmc_v6_0_wait_for_idle(adev)) + + if (gmc_v6_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); tmp = RREG32(mmSRBM_SOFT_RESET); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 994432fb57eaf..e2331a5aac0ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -52,7 +52,7 @@ static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); -static int gmc_v7_0_wait_for_idle(void *handle); +static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block); MODULE_FIRMWARE("amdgpu/bonaire_mc.bin"); MODULE_FIRMWARE("amdgpu/hawaii_mc.bin"); @@ -236,7 +236,6 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; - amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } @@ -921,9 +920,9 @@ static int gmc_v7_0_convert_vram_type(int mc_seq_vram_type) } } -static int gmc_v7_0_early_init(void *handle) +static int gmc_v7_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v7_0_set_gmc_funcs(adev); gmc_v7_0_set_irq_funcs(adev); @@ -940,9 +939,9 @@ static int gmc_v7_0_early_init(void *handle) return 0; } -static int gmc_v7_0_late_init(void *handle) +static int gmc_v7_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); @@ -968,10 +967,10 @@ static unsigned int gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } -static int gmc_v7_0_sw_init(void *handle) +static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); @@ -1060,9 +1059,9 @@ static int gmc_v7_0_sw_init(void *handle) return 0; } -static int gmc_v7_0_sw_fini(void *handle) +static int gmc_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); @@ -1074,10 +1073,10 @@ static int gmc_v7_0_sw_fini(void *handle) return 0; } -static int gmc_v7_0_hw_init(void *handle) +static int gmc_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v7_0_init_golden_registers(adev); @@ -1101,9 +1100,9 @@ static int gmc_v7_0_hw_init(void *handle) return 0; } -static int gmc_v7_0_hw_fini(void *handle) +static int gmc_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v7_0_gart_disable(adev); @@ -1111,25 +1110,22 @@ static int gmc_v7_0_hw_fini(void *handle) return 0; } -static int gmc_v7_0_suspend(void *handle) +static int gmc_v7_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - gmc_v7_0_hw_fini(adev); + gmc_v7_0_hw_fini(ip_block); return 0; } -static int gmc_v7_0_resume(void *handle) +static int gmc_v7_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v7_0_hw_init(adev); + r = gmc_v7_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } @@ -1146,11 +1142,11 @@ static bool gmc_v7_0_is_idle(void *handle) return true; } -static int gmc_v7_0_wait_for_idle(void *handle) +static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -1167,9 +1163,9 @@ static int gmc_v7_0_wait_for_idle(void *handle) } -static int gmc_v7_0_soft_reset(void *handle) +static int gmc_v7_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 86488c052f822..f5c1ebce73f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -53,7 +53,7 @@ static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); -static int gmc_v8_0_wait_for_idle(void *handle); +static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block); MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); @@ -170,8 +170,13 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) static void gmc_v8_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; + struct amdgpu_ip_block *ip_block; - gmc_v8_0_wait_for_idle(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); + if (!ip_block) + return; + + gmc_v8_0_wait_for_idle(ip_block); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) { @@ -411,7 +416,6 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } @@ -426,6 +430,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, */ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; u32 tmp; int i, j; @@ -439,7 +444,11 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (gmc_v8_0_wait_for_idle((void *)adev)) + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC); + if (!ip_block) + return; + + if (gmc_v8_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); if (adev->mode_info.num_crtc) { @@ -474,7 +483,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22); WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22); - if (gmc_v8_0_wait_for_idle((void *)adev)) + if (gmc_v8_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -544,6 +553,11 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) break; } adev->gmc.vram_width = numchan * chansize; + /* FIXME: The above calculation is outdated. + * For HBM provide a temporary fix + */ + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_HBM) + adev->gmc.vram_width = AMDGPU_VRAM_TYPE_HBM_WIDTH; } /* size in MB on si */ tmp = RREG32(mmCONFIG_MEMSIZE); @@ -1027,9 +1041,9 @@ static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type) } } -static int gmc_v8_0_early_init(void *handle) +static int gmc_v8_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v8_0_set_gmc_funcs(adev); gmc_v8_0_set_irq_funcs(adev); @@ -1046,9 +1060,9 @@ static int gmc_v8_0_early_init(void *handle) return 0; } -static int gmc_v8_0_late_init(void *handle) +static int gmc_v8_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); @@ -1076,10 +1090,10 @@ static unsigned int gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) #define mmMC_SEQ_MISC0_FIJI 0xA71 -static int gmc_v8_0_sw_init(void *handle) +static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); @@ -1173,9 +1187,9 @@ static int gmc_v8_0_sw_init(void *handle) return 0; } -static int gmc_v8_0_sw_fini(void *handle) +static int gmc_v8_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); @@ -1187,10 +1201,10 @@ static int gmc_v8_0_sw_fini(void *handle) return 0; } -static int gmc_v8_0_hw_init(void *handle) +static int gmc_v8_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v8_0_init_golden_registers(adev); @@ -1222,9 +1236,9 @@ static int gmc_v8_0_hw_init(void *handle) return 0; } -static int gmc_v8_0_hw_fini(void *handle) +static int gmc_v8_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); gmc_v8_0_gart_disable(adev); @@ -1232,25 +1246,22 @@ static int gmc_v8_0_hw_fini(void *handle) return 0; } -static int gmc_v8_0_suspend(void *handle) +static int gmc_v8_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - gmc_v8_0_hw_fini(adev); + gmc_v8_0_hw_fini(ip_block); return 0; } -static int gmc_v8_0_resume(void *handle) +static int gmc_v8_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v8_0_hw_init(adev); + r = gmc_v8_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } @@ -1267,11 +1278,11 @@ static bool gmc_v8_0_is_idle(void *handle) return true; } -static int gmc_v8_0_wait_for_idle(void *handle) +static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned int i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -1289,10 +1300,10 @@ static int gmc_v8_0_wait_for_idle(void *handle) } -static bool gmc_v8_0_check_soft_reset(void *handle) +static bool gmc_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__VMC_BUSY_MASK) @@ -1316,23 +1327,23 @@ static bool gmc_v8_0_check_soft_reset(void *handle) return false; } -static int gmc_v8_0_pre_soft_reset(void *handle) +static int gmc_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->gmc.srbm_soft_reset) return 0; gmc_v8_0_mc_stop(adev); - if (gmc_v8_0_wait_for_idle(adev)) + if (gmc_v8_0_wait_for_idle(ip_block)) dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); return 0; } -static int gmc_v8_0_soft_reset(void *handle) +static int gmc_v8_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset; if (!adev->gmc.srbm_soft_reset) @@ -1361,9 +1372,9 @@ static int gmc_v8_0_soft_reset(void *handle) return 0; } -static int gmc_v8_0_post_soft_reset(void *handle) +static int gmc_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->gmc.srbm_soft_reset) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b73136d390cc0..d32bb02253d8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -78,7 +78,9 @@ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV #define MAX_MEM_RANGES 8 +#endif static const char * const gfxhub_client_ids[] = { "CB", @@ -595,7 +597,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault); + addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +620,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) + addr, entry->timestamp, write_fault)) return 1; } } @@ -1386,15 +1388,45 @@ gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) return mode; } +static enum amdgpu_memory_partition +gmc_v9_0_query_vf_memory_partition(struct amdgpu_device *adev) +{ + switch (adev->gmc.num_mem_partitions) { + case 0: + return UNKNOWN_MEMORY_PARTITION_MODE; + case 1: + return AMDGPU_NPS1_PARTITION_MODE; + case 2: + return AMDGPU_NPS2_PARTITION_MODE; + case 4: + return AMDGPU_NPS4_PARTITION_MODE; + default: + return AMDGPU_NPS1_PARTITION_MODE; + } + + return AMDGPU_NPS1_PARTITION_MODE; +} + static enum amdgpu_memory_partition gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev)) - return AMDGPU_NPS1_PARTITION_MODE; + return gmc_v9_0_query_vf_memory_partition(adev); return gmc_v9_0_get_memory_partition(adev, NULL); } +static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev) +{ + if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested && + adev->nbio.funcs->is_nps_switch_requested(adev)) { + adev->gmc.reset_flags |= AMDGPU_GMC_INIT_RESET_NPS; + return true; + } + + return false; +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, @@ -1406,6 +1438,8 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, + .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition, + .need_reset_on_init = &gmc_v9_0_need_reset_on_init, }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -1545,9 +1579,31 @@ static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev) adev->gmc.xgmi.ras = &xgmi_ras; } -static int gmc_v9_0_early_init(void *handle) +static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gmc.supported_nps_modes = 0; + + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) + return; + + /*TODO: Check PSP version also which supports NPS switch. Otherwise keep + * supported modes as 0. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + adev->gmc.supported_nps_modes = + BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + default: + break; + } +} + +static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; /* * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined @@ -1601,9 +1657,9 @@ static int gmc_v9_0_early_init(void *handle) return 0; } -static int gmc_v9_0_late_init(void *handle) +static int gmc_v9_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_gmc_allocate_vm_inv_eng(adev); @@ -1640,8 +1696,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = adev->mmhub.funcs->get_fb_location(adev); - amdgpu_gmc_set_agp_default(adev, mc); - /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; if (adev->gmc.xgmi.connected_to_cpu) { @@ -1806,6 +1860,7 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev) { enum amdgpu_memory_partition mode; @@ -1900,6 +1955,8 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, switch (mode) { case UNKNOWN_MEMORY_PARTITION_MODE: + adev->gmc.num_mem_partitions = 0; + break; case AMDGPU_NPS1_PARTITION_MODE: adev->gmc.num_mem_partitions = 1; break; @@ -1919,7 +1976,7 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, /* Use NPS range info, if populated */ r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges, - adev->gmc.num_mem_partitions); + &adev->gmc.num_mem_partitions); if (!r) { l = 0; for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { @@ -1929,6 +1986,11 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, } } else { + if (!adev->gmc.num_mem_partitions) { + dev_err(adev->dev, + "Not able to detect NPS mode, fall back to NPS1"); + adev->gmc.num_mem_partitions = 1; + } /* Fallback to sw based calculation */ size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; size /= adev->gmc.num_mem_partitions; @@ -1980,6 +2042,7 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) return 0; } +#endif static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { @@ -1987,10 +2050,10 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) adev->gmc.vram_width = 128 * 64; } -static int gmc_v9_0_sw_init(void *handle) +static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; unsigned long inst_mask = adev->aid_mask; adev->gfxhub.funcs->init(adev); @@ -2149,12 +2212,14 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { r = gmc_v9_0_init_mem_ranges(adev); if (r) return r; } +#endif /* Memory manager */ r = amdgpu_bo_init(adev); @@ -2165,6 +2230,7 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + gmc_v9_0_init_nps_details(adev); /* * number of VMs * VMID 0 is reserved for System @@ -2198,9 +2264,9 @@ static int gmc_v9_0_sw_init(void *handle) return 0; } -static int gmc_v9_0_sw_fini(void *handle) +static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) @@ -2218,8 +2284,10 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); +#ifdef HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV adev->gmc.num_mem_partitions = 0; kfree(adev->gmc.mem_partitions); +#endif return 0; } @@ -2308,9 +2376,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return 0; } -static int gmc_v9_0_hw_init(void *handle) +static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool value; int i, r; @@ -2393,9 +2461,9 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) adev->mmhub.funcs->gart_disable(adev); } -static int gmc_v9_0_hw_fini(void *handle) +static int gmc_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v9_0_gart_disable(adev); @@ -2413,32 +2481,44 @@ static int gmc_v9_0_hw_fini(void *handle) if (adev->mmhub.funcs->update_power_gating) adev->mmhub.funcs->update_power_gating(adev, false); - amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + /* + * For minimal init, late_init is not called, hence VM fault/RAS irqs + * are not enabled. + */ + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); - if (adev->gmc.ecc_irq.funcs && - amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + } return 0; } -static int gmc_v9_0_suspend(void *handle) +static int gmc_v9_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return gmc_v9_0_hw_fini(adev); + return gmc_v9_0_hw_fini(ip_block); } -static int gmc_v9_0_resume(void *handle) +static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v9_0_hw_init(adev); + /* If a reset is done for NPS mode switch, read the memory range + * information again. + */ + if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) { + gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS; + } + + r = gmc_v9_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } @@ -2449,13 +2529,13 @@ static bool gmc_v9_0_is_idle(void *handle) return true; } -static int gmc_v9_0_wait_for_idle(void *handle) +static int gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* There is no need to wait for MC idle in GMC v9.*/ return 0; } -static int gmc_v9_0_soft_reset(void *handle) +static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* XXX for emulation.*/ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 077c6d920e27f..e019249883fb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,7 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index a9ea23fa0def7..ed7facacf2fe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,7 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index ab06c2b4b20b2..33736d361dd0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,7 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 8d7d0813e3315..63820329f67eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -31,10 +31,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else + if (!ring || !ring->funcs->emit_wreg) { + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + } } static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 07984f7c3ae77..a3fb01f905d43 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -273,9 +273,9 @@ static void iceland_ih_set_rptr(struct amdgpu_device *adev, WREG32(mmIH_RB_RPTR, ih->rptr); } -static int iceland_ih_early_init(void *handle) +static int iceland_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = amdgpu_irq_add_domain(adev); @@ -287,10 +287,10 @@ static int iceland_ih_early_init(void *handle) return 0; } -static int iceland_ih_sw_init(void *handle) +static int iceland_ih_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) @@ -301,9 +301,9 @@ static int iceland_ih_sw_init(void *handle) return r; } -static int iceland_ih_sw_fini(void *handle) +static int iceland_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); @@ -311,34 +311,28 @@ static int iceland_ih_sw_fini(void *handle) return 0; } -static int iceland_ih_hw_init(void *handle) +static int iceland_ih_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return iceland_ih_irq_init(adev); } -static int iceland_ih_hw_fini(void *handle) +static int iceland_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - iceland_ih_irq_disable(adev); + iceland_ih_irq_disable(ip_block->adev); return 0; } -static int iceland_ih_suspend(void *handle) +static int iceland_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return iceland_ih_hw_fini(adev); + return iceland_ih_hw_fini(ip_block); } -static int iceland_ih_resume(void *handle) +static int iceland_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return iceland_ih_hw_init(adev); + return iceland_ih_hw_init(ip_block); } static bool iceland_ih_is_idle(void *handle) @@ -352,11 +346,11 @@ static bool iceland_ih_is_idle(void *handle) return true; } -static int iceland_ih_wait_for_idle(void *handle) +static int iceland_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -368,10 +362,10 @@ static int iceland_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int iceland_ih_soft_reset(void *handle) +static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 18a761d6ef330..09403eac483bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -559,19 +559,19 @@ static void ih_v6_0_set_self_irq_funcs(struct amdgpu_device *adev) adev->irq.self_irq.funcs = &ih_v6_0_self_irq_funcs; } -static int ih_v6_0_early_init(void *handle) +static int ih_v6_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v6_0_set_interrupt_funcs(adev); ih_v6_0_set_self_irq_funcs(adev); return 0; } -static int ih_v6_0_sw_init(void *handle) +static int ih_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool use_bus_addr; r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, @@ -614,19 +614,19 @@ static int ih_v6_0_sw_init(void *handle) return r; } -static int ih_v6_0_sw_fini(void *handle) +static int ih_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); return 0; } -static int ih_v6_0_hw_init(void *handle) +static int ih_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = ih_v6_0_irq_init(adev); if (r) @@ -635,27 +635,21 @@ static int ih_v6_0_hw_init(void *handle) return 0; } -static int ih_v6_0_hw_fini(void *handle) +static int ih_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - ih_v6_0_irq_disable(adev); + ih_v6_0_irq_disable(ip_block->adev); return 0; } -static int ih_v6_0_suspend(void *handle) +static int ih_v6_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return ih_v6_0_hw_fini(adev); + return ih_v6_0_hw_fini(ip_block); } -static int ih_v6_0_resume(void *handle) +static int ih_v6_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return ih_v6_0_hw_init(adev); + return ih_v6_0_hw_init(ip_block); } static bool ih_v6_0_is_idle(void *handle) @@ -664,13 +658,13 @@ static bool ih_v6_0_is_idle(void *handle) return true; } -static int ih_v6_0_wait_for_idle(void *handle) +static int ih_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return -ETIMEDOUT; } -static int ih_v6_0_soft_reset(void *handle) +static int ih_v6_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 2e0469feca1e9..9706d7593d267 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -532,9 +532,9 @@ static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev) adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs; } -static int ih_v6_1_early_init(void *handle) +static int ih_v6_1_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = amdgpu_irq_add_domain(adev); @@ -547,10 +547,10 @@ static int ih_v6_1_early_init(void *handle) return 0; } -static int ih_v6_1_sw_init(void *handle) +static int ih_v6_1_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool use_bus_addr; r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, @@ -593,19 +593,19 @@ static int ih_v6_1_sw_init(void *handle) return r; } -static int ih_v6_1_sw_fini(void *handle) +static int ih_v6_1_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); return 0; } -static int ih_v6_1_hw_init(void *handle) +static int ih_v6_1_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = ih_v6_1_irq_init(adev); if (r) @@ -614,27 +614,21 @@ static int ih_v6_1_hw_init(void *handle) return 0; } -static int ih_v6_1_hw_fini(void *handle) +static int ih_v6_1_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - ih_v6_1_irq_disable(adev); + ih_v6_1_irq_disable(ip_block->adev); return 0; } -static int ih_v6_1_suspend(void *handle) +static int ih_v6_1_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return ih_v6_1_hw_fini(adev); + return ih_v6_1_hw_fini(ip_block); } -static int ih_v6_1_resume(void *handle) +static int ih_v6_1_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return ih_v6_1_hw_init(adev); + return ih_v6_1_hw_init(ip_block); } static bool ih_v6_1_is_idle(void *handle) @@ -643,13 +637,13 @@ static bool ih_v6_1_is_idle(void *handle) return true; } -static int ih_v6_1_wait_for_idle(void *handle) +static int ih_v6_1_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return -ETIMEDOUT; } -static int ih_v6_1_soft_reset(void *handle) +static int ih_v6_1_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index 6852081fcff21..9657145d7ccea 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -528,19 +528,19 @@ static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev) adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs; } -static int ih_v7_0_early_init(void *handle) +static int ih_v7_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v7_0_set_interrupt_funcs(adev); ih_v7_0_set_self_irq_funcs(adev); return 0; } -static int ih_v7_0_sw_init(void *handle) +static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool use_bus_addr; r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, @@ -583,19 +583,19 @@ static int ih_v7_0_sw_init(void *handle) return r; } -static int ih_v7_0_sw_fini(void *handle) +static int ih_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); return 0; } -static int ih_v7_0_hw_init(void *handle) +static int ih_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = ih_v7_0_irq_init(adev); if (r) @@ -604,27 +604,21 @@ static int ih_v7_0_hw_init(void *handle) return 0; } -static int ih_v7_0_hw_fini(void *handle) +static int ih_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - ih_v7_0_irq_disable(adev); + ih_v7_0_irq_disable(ip_block->adev); return 0; } -static int ih_v7_0_suspend(void *handle) +static int ih_v7_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return ih_v7_0_hw_fini(adev); + return ih_v7_0_hw_fini(ip_block); } -static int ih_v7_0_resume(void *handle) +static int ih_v7_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return ih_v7_0_hw_init(adev); + return ih_v7_0_hw_init(ip_block); } static bool ih_v7_0_is_idle(void *handle) @@ -633,13 +627,13 @@ static bool ih_v7_0_is_idle(void *handle) return true; } -static int ih_v7_0_wait_for_idle(void *handle) +static int ih_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return -ETIMEDOUT; } -static int ih_v7_0_soft_reset(void *handle) +static int ih_v7_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 6c1891889c4da..d4f72e47ae9e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -153,7 +153,7 @@ static void imu_v11_0_setup(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); } - //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB + //disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10); imu_reg_val |= 0x10007; WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 71f43a5c7f721..33da094f1a7dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v1_0.h" @@ -34,6 +35,9 @@ static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring); +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) { @@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4))); amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); @@ -455,9 +462,9 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev, * * Set ring and irq function pointers */ -int jpeg_v1_0_early_init(void *handle) +int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->jpeg.num_jpeg_inst = 1; adev->jpeg.num_jpeg_rings = 1; @@ -474,9 +481,9 @@ int jpeg_v1_0_early_init(void *handle) * @handle: amdgpu_device pointer * */ -int jpeg_v1_0_sw_init(void *handle) +int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int r; @@ -506,9 +513,9 @@ int jpeg_v1_0_sw_init(void *handle) * * JPEG free up sw allocation */ -void jpeg_v1_0_sw_fini(void *handle) +void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_ring_fini(adev->jpeg.inst->ring_dec); } @@ -554,6 +561,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, .set_wptr = jpeg_v1_0_decode_ring_set_wptr, + .parse_cs = jpeg_v1_dec_ring_parse_cs, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + @@ -611,3 +619,69 @@ static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); } + +/** + * jpeg_v1_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + int ret = 0; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */ + return -EINVAL; + + if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END) + continue; + + switch (type) { + case PACKETJ_TYPE0: + if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW && + reg != JPEG_V1_REG_CTX_INDEX && + reg != JPEG_V1_REG_CTX_DATA) { + ret = -EINVAL; + } + break; + case PACKETJ_TYPE1: + if (reg != JPEG_V1_REG_CTX_DATA) + ret = -EINVAL; + break; + case PACKETJ_TYPE3: + if (reg != JPEG_V1_REG_SOFT_RESET) + ret = -EINVAL; + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] != CP_PACKETJ_NOP) + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + if (ret) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + break; + } + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h index bbf33a6a39729..0973286350835 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h @@ -24,9 +24,20 @@ #ifndef __JPEG_V1_0_H__ #define __JPEG_V1_0_H__ -int jpeg_v1_0_early_init(void *handle); -int jpeg_v1_0_sw_init(void *handle); -void jpeg_v1_0_sw_fini(void *handle); +int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block); +int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block); +void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block); void jpeg_v1_0_start(struct amdgpu_device *adev, int mode); +#define JPEG_V1_REG_RANGE_START 0x8000 +#define JPEG_V1_REG_RANGE_END 0x803f + +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238 +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239 +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b +#define JPEG_V1_REG_CTX_INDEX 0x8328 +#define JPEG_V1_REG_CTX_DATA 0x8329 +#define JPEG_V1_REG_SOFT_RESET 0x83a0 + #endif /*__JPEG_V1_0_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 98aa3ccd0d202..ec3d341fef61b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" @@ -44,9 +45,9 @@ static int jpeg_v2_0_set_powergating_state(void *handle, * * Set ring and irq function pointers */ -static int jpeg_v2_0_early_init(void *handle) +static int jpeg_v2_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->jpeg.num_jpeg_inst = 1; adev->jpeg.num_jpeg_rings = 1; @@ -64,9 +65,9 @@ static int jpeg_v2_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int jpeg_v2_0_sw_init(void *handle) +static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int r; @@ -107,10 +108,10 @@ static int jpeg_v2_0_sw_init(void *handle) * * JPEG suspend and free up sw allocation */ -static int jpeg_v2_0_sw_fini(void *handle) +static int jpeg_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_jpeg_suspend(adev); if (r) @@ -127,9 +128,9 @@ static int jpeg_v2_0_sw_fini(void *handle) * @handle: amdgpu_device pointer * */ -static int jpeg_v2_0_hw_init(void *handle) +static int jpeg_v2_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, @@ -145,9 +146,9 @@ static int jpeg_v2_0_hw_init(void *handle) * * Stop the JPEG block, mark ring as not ready any more */ -static int jpeg_v2_0_hw_fini(void *handle) +static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -165,16 +166,15 @@ static int jpeg_v2_0_hw_fini(void *handle) * * HW fini and suspend JPEG block */ -static int jpeg_v2_0_suspend(void *handle) +static int jpeg_v2_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = jpeg_v2_0_hw_fini(adev); + r = jpeg_v2_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_jpeg_suspend(adev); + r = amdgpu_jpeg_suspend(ip_block->adev); return r; } @@ -186,16 +186,15 @@ static int jpeg_v2_0_suspend(void *handle) * * Resume firmware and hw init JPEG block */ -static int jpeg_v2_0_resume(void *handle) +static int jpeg_v2_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_jpeg_resume(adev); + r = amdgpu_jpeg_resume(ip_block->adev); if (r) return r; - r = jpeg_v2_0_hw_init(adev); + r = jpeg_v2_0_hw_init(ip_block); return r; } @@ -538,7 +537,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -661,9 +664,9 @@ static bool jpeg_v2_0_is_idle(void *handle) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v2_0_wait_for_idle(void *handle) +static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, @@ -764,6 +767,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -810,3 +814,58 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = { .rev = 0, .funcs = &jpeg_v2_0_ip_funcs, }; + +/** + * jpeg_v2_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 654e43e83e2c4..63fadda7a6733 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -45,6 +45,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -57,6 +60,9 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index d8ef95c847c2a..37cf415b6b410 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -54,9 +54,9 @@ static int amdgpu_ih_clientid_jpeg[] = { * * Set ring and irq function pointers */ -static int jpeg_v2_5_early_init(void *handle) +static int jpeg_v2_5_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 harvest; int i; @@ -85,11 +85,11 @@ static int jpeg_v2_5_early_init(void *handle) * * Load firmware and sw initialization */ -static int jpeg_v2_5_sw_init(void *handle) +static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) @@ -157,10 +157,10 @@ static int jpeg_v2_5_sw_init(void *handle) * * JPEG suspend and free up sw allocation */ -static int jpeg_v2_5_sw_fini(void *handle) +static int jpeg_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_jpeg_suspend(adev); if (r) @@ -177,9 +177,9 @@ static int jpeg_v2_5_sw_fini(void *handle) * @handle: amdgpu_device pointer * */ -static int jpeg_v2_5_hw_init(void *handle) +static int jpeg_v2_5_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r; @@ -206,9 +206,9 @@ static int jpeg_v2_5_hw_init(void *handle) * * Stop the JPEG block, mark ring as not ready any more */ -static int jpeg_v2_5_hw_fini(void *handle) +static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -235,16 +235,15 @@ static int jpeg_v2_5_hw_fini(void *handle) * * HW fini and suspend JPEG block */ -static int jpeg_v2_5_suspend(void *handle) +static int jpeg_v2_5_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = jpeg_v2_5_hw_fini(adev); + r = jpeg_v2_5_hw_fini(ip_block); if (r) return r; - r = amdgpu_jpeg_suspend(adev); + r = amdgpu_jpeg_suspend(ip_block->adev); return r; } @@ -256,16 +255,15 @@ static int jpeg_v2_5_suspend(void *handle) * * Resume firmware and hw init JPEG block */ -static int jpeg_v2_5_resume(void *handle) +static int jpeg_v2_5_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_jpeg_resume(adev); + r = amdgpu_jpeg_resume(ip_block->adev); if (r) return r; - r = jpeg_v2_5_hw_init(adev); + r = jpeg_v2_5_hw_init(ip_block); return r; } @@ -501,9 +499,9 @@ static bool jpeg_v2_5_is_idle(void *handle) return ret; } -static int jpeg_v2_5_wait_for_idle(void *handle) +static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { @@ -662,6 +660,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -691,6 +690,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 31cfa3ce6528d..2fa866a3e39db 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -46,9 +46,9 @@ static int jpeg_v3_0_set_powergating_state(void *handle, * * Set ring and irq function pointers */ -static int jpeg_v3_0_early_init(void *handle) +static int jpeg_v3_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 harvest; @@ -79,9 +79,9 @@ static int jpeg_v3_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int jpeg_v3_0_sw_init(void *handle) +static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int r; @@ -122,9 +122,9 @@ static int jpeg_v3_0_sw_init(void *handle) * * JPEG suspend and free up sw allocation */ -static int jpeg_v3_0_sw_fini(void *handle) +static int jpeg_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_jpeg_suspend(adev); @@ -142,9 +142,9 @@ static int jpeg_v3_0_sw_fini(void *handle) * @handle: amdgpu_device pointer * */ -static int jpeg_v3_0_hw_init(void *handle) +static int jpeg_v3_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, @@ -160,9 +160,9 @@ static int jpeg_v3_0_hw_init(void *handle) * * Stop the JPEG block, mark ring as not ready any more */ -static int jpeg_v3_0_hw_fini(void *handle) +static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -180,16 +180,15 @@ static int jpeg_v3_0_hw_fini(void *handle) * * HW fini and suspend JPEG block */ -static int jpeg_v3_0_suspend(void *handle) +static int jpeg_v3_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = jpeg_v3_0_hw_fini(adev); + r = jpeg_v3_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_jpeg_suspend(adev); + r = amdgpu_jpeg_suspend(ip_block->adev); return r; } @@ -201,16 +200,15 @@ static int jpeg_v3_0_suspend(void *handle) * * Resume firmware and hw init JPEG block */ -static int jpeg_v3_0_resume(void *handle) +static int jpeg_v3_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_jpeg_resume(adev); + r = amdgpu_jpeg_resume(ip_block->adev); if (r) return r; - r = jpeg_v3_0_hw_init(adev); + r = jpeg_v3_0_hw_init(ip_block); return r; } @@ -459,9 +457,9 @@ static bool jpeg_v3_0_is_idle(void *handle) return ret; } -static int jpeg_v3_0_wait_for_idle(void *handle) +static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, @@ -560,6 +558,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 3dac8f259d7fb..3cef4124b171b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -52,9 +52,9 @@ static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring); * * Set ring and irq function pointers */ -static int jpeg_v4_0_early_init(void *handle) +static int jpeg_v4_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->jpeg.num_jpeg_inst = 1; @@ -74,9 +74,9 @@ static int jpeg_v4_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int jpeg_v4_0_sw_init(void *handle) +static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int r; @@ -134,9 +134,9 @@ static int jpeg_v4_0_sw_init(void *handle) * * JPEG suspend and free up sw allocation */ -static int jpeg_v4_0_sw_fini(void *handle) +static int jpeg_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_jpeg_suspend(adev); @@ -154,9 +154,9 @@ static int jpeg_v4_0_sw_fini(void *handle) * @handle: amdgpu_device pointer * */ -static int jpeg_v4_0_hw_init(void *handle) +static int jpeg_v4_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; @@ -191,9 +191,9 @@ static int jpeg_v4_0_hw_init(void *handle) * * Stop the JPEG block, mark ring as not ready any more */ -static int jpeg_v4_0_hw_fini(void *handle) +static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vcn.idle_work); if (!amdgpu_sriov_vf(adev)) { @@ -214,16 +214,15 @@ static int jpeg_v4_0_hw_fini(void *handle) * * HW fini and suspend JPEG block */ -static int jpeg_v4_0_suspend(void *handle) +static int jpeg_v4_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = jpeg_v4_0_hw_fini(adev); + r = jpeg_v4_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_jpeg_suspend(adev); + r = amdgpu_jpeg_suspend(ip_block->adev); return r; } @@ -235,16 +234,15 @@ static int jpeg_v4_0_suspend(void *handle) * * Resume firmware and hw init JPEG block */ -static int jpeg_v4_0_resume(void *handle) +static int jpeg_v4_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_jpeg_resume(adev); + r = amdgpu_jpeg_resume(ip_block->adev); if (r) return r; - r = jpeg_v4_0_hw_init(adev); + r = jpeg_v4_0_hw_init(ip_block); return r; } @@ -621,9 +619,9 @@ static bool jpeg_v4_0_is_idle(void *handle) return ret; } -static int jpeg_v4_0_wait_for_idle(void *handle) +static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, @@ -727,6 +725,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h index 07d36c2abd6bb..47638fd4d4e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h @@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block { }; extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block; - #endif /* __JPEG_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 6ae5a784e1874..fd108f992ab15 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,9 +23,9 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" -#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "mmsch_v4_0_3.h" @@ -59,6 +59,12 @@ static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG7_DECODE }; +static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev) +{ + return amdgpu_sriov_vf(adev) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)); +} + /** * jpeg_v4_0_3_early_init - set function pointers * @@ -66,9 +72,9 @@ static int amdgpu_ih_srcid_jpeg[] = { * * Set ring and irq function pointers */ -static int jpeg_v4_0_3_early_init(void *handle) +static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; @@ -86,9 +92,9 @@ static int jpeg_v4_0_3_early_init(void *handle) * * Load firmware and sw initialization */ -static int jpeg_v4_0_3_sw_init(void *handle) +static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, j, r, jpeg_inst; @@ -163,9 +169,9 @@ static int jpeg_v4_0_3_sw_init(void *handle) * * JPEG suspend and free up sw allocation */ -static int jpeg_v4_0_3_sw_fini(void *handle) +static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_jpeg_suspend(adev); @@ -296,9 +302,9 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) * @handle: amdgpu_device pointer * */ -static int jpeg_v4_0_3_hw_init(void *handle) +static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, j, r, jpeg_inst; @@ -356,17 +362,15 @@ static int jpeg_v4_0_3_hw_init(void *handle) * * Stop the JPEG block, mark ring as not ready any more */ -static int jpeg_v4_0_3_hw_fini(void *handle) +static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; cancel_delayed_work_sync(&adev->jpeg.idle_work); - if (!amdgpu_sriov_vf(adev)) { - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); - } + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); return ret; } @@ -378,16 +382,15 @@ static int jpeg_v4_0_3_hw_fini(void *handle) * * HW fini and suspend JPEG block */ -static int jpeg_v4_0_3_suspend(void *handle) +static int jpeg_v4_0_3_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = jpeg_v4_0_3_hw_fini(adev); + r = jpeg_v4_0_3_hw_fini(ip_block); if (r) return r; - r = amdgpu_jpeg_suspend(adev); + r = amdgpu_jpeg_suspend(ip_block->adev); return r; } @@ -399,16 +402,15 @@ static int jpeg_v4_0_3_suspend(void *handle) * * Resume firmware and hw init JPEG block */ -static int jpeg_v4_0_3_resume(void *handle) +static int jpeg_v4_0_3_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_jpeg_resume(adev); + r = amdgpu_jpeg_resume(ip_block->adev); if (r) return r; - r = jpeg_v4_0_3_hw_init(adev); + r = jpeg_v4_0_3_hw_init(ip_block); return r; } @@ -668,11 +670,12 @@ void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ - } - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x80004000); + amdgpu_ring_write(ring, + PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, + 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80004000); + } } /** @@ -688,11 +691,12 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, 0x62a04); - } - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x00004000); + amdgpu_ring_write(ring, + PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, + 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00004000); + } } /** @@ -734,31 +738,11 @@ void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); amdgpu_ring_write(ring, 0); - if (ring->adev->jpeg.inst[ring->me].aid_id) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x4); - } else { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } - - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x3fbc); - - if (ring->adev->jpeg.inst[ring->me].aid_id) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, - 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x0); - } else { - amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); - amdgpu_ring_write(ring, 0); - } + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x1); + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); amdgpu_ring_write(ring, 0); @@ -834,8 +818,8 @@ void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, { uint32_t reg_offset; - /* For VF, only local offsets should be used */ - if (amdgpu_sriov_vf(ring->adev)) + /* Use normalized offsets if required */ + if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) reg = NORMALIZE_JPEG_REG_OFFSET(reg); reg_offset = (reg << 2); @@ -881,8 +865,8 @@ void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint { uint32_t reg_offset; - /* For VF, only local offsets should be used */ - if (amdgpu_sriov_vf(ring->adev)) + /* Use normalized offsets if required */ + if (jpeg_v4_0_3_normalizn_reqd(ring->adev)) reg = NORMALIZE_JPEG_REG_OFFSET(reg); reg_offset = (reg << 2); @@ -935,9 +919,9 @@ static bool jpeg_v4_0_3_is_idle(void *handle) return ret; } -static int jpeg_v4_0_3_wait_for_idle(void *handle) +static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; int i, j; @@ -1089,12 +1073,12 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, - .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ - 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ + 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ 8 + 16, .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, @@ -1254,56 +1238,3 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } - -/** - * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser - * - * @parser: Command submission parser context - * @job: the job to parse - * @ib: the IB to parse - * - * Parse the command stream, return -EINVAL for invalid packet, - * 0 otherwise - */ -int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - uint32_t i, reg, res, cond, type; - struct amdgpu_device *adev = parser->adev; - - for (i = 0; i < ib->length_dw ; i += 2) { - reg = CP_PACKETJ_GET_REG(ib->ptr[i]); - res = CP_PACKETJ_GET_RES(ib->ptr[i]); - cond = CP_PACKETJ_GET_COND(ib->ptr[i]); - type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); - - if (res) /* only support 0 at the moment */ - return -EINVAL; - - switch (type) { - case PACKETJ_TYPE0: - if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE3: - if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE6: - if (ib->ptr[i] == CP_PACKETJ_NOP) - continue; - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - default: - dev_err(adev->dev, "Unknown packet type %d !\n", type); - return -EINVAL; - } - } - - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 71c54b294e157..747a3e5f68564 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,9 +46,6 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 -#define JPEG_REG_RANGE_START 0x4000 -#define JPEG_REG_RANGE_END 0x41c2 - extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -65,7 +62,5 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); -int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib); + #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index f96ac6bce526d..2f9749b00eaf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -65,9 +65,9 @@ static int amdgpu_ih_clientid_jpeg[] = { * * Set ring and irq function pointers */ -static int jpeg_v4_0_5_early_init(void *handle) +static int jpeg_v4_0_5_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(4, 0, 5): @@ -98,9 +98,9 @@ static int jpeg_v4_0_5_early_init(void *handle) * * Load firmware and sw initialization */ -static int jpeg_v4_0_5_sw_init(void *handle) +static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int r, i; @@ -163,9 +163,9 @@ static int jpeg_v4_0_5_sw_init(void *handle) * * JPEG suspend and free up sw allocation */ -static int jpeg_v4_0_5_sw_fini(void *handle) +static int jpeg_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_jpeg_suspend(adev); @@ -183,9 +183,9 @@ static int jpeg_v4_0_5_sw_fini(void *handle) * @handle: amdgpu_device pointer * */ -static int jpeg_v4_0_5_hw_init(void *handle) +static int jpeg_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r = 0; @@ -214,9 +214,9 @@ static int jpeg_v4_0_5_hw_init(void *handle) * * Stop the JPEG block, mark ring as not ready any more */ -static int jpeg_v4_0_5_hw_fini(void *handle) +static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -241,16 +241,15 @@ static int jpeg_v4_0_5_hw_fini(void *handle) * * HW fini and suspend JPEG block */ -static int jpeg_v4_0_5_suspend(void *handle) +static int jpeg_v4_0_5_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = jpeg_v4_0_5_hw_fini(adev); + r = jpeg_v4_0_5_hw_fini(ip_block); if (r) return r; - r = amdgpu_jpeg_suspend(adev); + r = amdgpu_jpeg_suspend(ip_block->adev); return r; } @@ -262,16 +261,15 @@ static int jpeg_v4_0_5_suspend(void *handle) * * Resume firmware and hw init JPEG block */ -static int jpeg_v4_0_5_resume(void *handle) +static int jpeg_v4_0_5_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_jpeg_resume(adev); + r = amdgpu_jpeg_resume(ip_block->adev); if (r) return r; - r = jpeg_v4_0_5_hw_init(adev); + r = jpeg_v4_0_5_hw_init(ip_block); return r; } @@ -637,9 +635,9 @@ static bool jpeg_v4_0_5_is_idle(void *handle) return ret; } -static int jpeg_v4_0_5_wait_for_idle(void *handle) +static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { @@ -768,6 +766,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index f4daff90c7709..a9a0b3f250dee 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -26,6 +26,7 @@ #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "vcn/vcn_5_0_0_offset.h" @@ -45,9 +46,9 @@ static int jpeg_v5_0_0_set_powergating_state(void *handle, * * Set ring and irq function pointers */ -static int jpeg_v5_0_0_early_init(void *handle) +static int jpeg_v5_0_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->jpeg.num_jpeg_inst = 1; adev->jpeg.num_jpeg_rings = 1; @@ -65,9 +66,9 @@ static int jpeg_v5_0_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int jpeg_v5_0_0_sw_init(void *handle) +static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int r; @@ -109,9 +110,9 @@ static int jpeg_v5_0_0_sw_init(void *handle) * * JPEG suspend and free up sw allocation */ -static int jpeg_v5_0_0_sw_fini(void *handle) +static int jpeg_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_jpeg_suspend(adev); @@ -129,9 +130,9 @@ static int jpeg_v5_0_0_sw_fini(void *handle) * @handle: amdgpu_device pointer * */ -static int jpeg_v5_0_0_hw_init(void *handle) +static int jpeg_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; @@ -156,9 +157,9 @@ static int jpeg_v5_0_0_hw_init(void *handle) * * Stop the JPEG block, mark ring as not ready any more */ -static int jpeg_v5_0_0_hw_fini(void *handle) +static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -176,16 +177,15 @@ static int jpeg_v5_0_0_hw_fini(void *handle) * * HW fini and suspend JPEG block */ -static int jpeg_v5_0_0_suspend(void *handle) +static int jpeg_v5_0_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = jpeg_v5_0_0_hw_fini(adev); + r = jpeg_v5_0_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_jpeg_suspend(adev); + r = amdgpu_jpeg_suspend(ip_block->adev); return r; } @@ -197,16 +197,15 @@ static int jpeg_v5_0_0_suspend(void *handle) * * Resume firmware and hw init JPEG block */ -static int jpeg_v5_0_0_resume(void *handle) +static int jpeg_v5_0_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_jpeg_resume(adev); + r = amdgpu_jpeg_resume(ip_block->adev); if (r) return r; - r = jpeg_v5_0_0_hw_init(adev); + r = jpeg_v5_0_0_hw_init(ip_block); return r; } @@ -545,9 +544,9 @@ static bool jpeg_v5_0_0_is_idle(void *handle) return ret; } -static int jpeg_v5_0_0_wait_for_idle(void *handle) +static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK, @@ -646,7 +645,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, - .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c0340ee3dec04..4b0f83fe9dd98 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "soc15_common.h" #include "soc21.h" +#include "gfx_v11_0.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" #include "gc/gc_11_0_0_default.h" @@ -54,8 +55,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin"); -static int mes_v11_0_hw_init(void *handle); -static int mes_v11_0_hw_fini(void *handle); +static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block); +static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); @@ -160,7 +161,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; - signed long timeout = 3000000; /* 3000 ms */ + signed long timeout = 2100000; /* 2100 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; @@ -360,6 +361,125 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, + uint32_t queue_id, uint32_t vmid) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t value, reg; + int i, r = 0; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + if (queue_type == AMDGPU_RING_TYPE_GFX) { + dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n", + me_id, pipe_id, queue_id, vmid); + + mutex_lock(&adev->gfx.reset_sem_mutex); + gfx_v11_0_request_gfx_index_mutex(adev, true); + /* all se allow writes */ + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, + (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (pipe_id == 0) + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id); + else + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id); + WREG32_SOC15(GC, 0, regCP_VMID_RESET, value); + gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n"); + r = -ETIMEDOUT; + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on hqd deactivate\n"); + r = -ETIMEDOUT; + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { + dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + switch (me_id) { + case 1: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ); + break; + case 0: + default: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ); + break; + } + + value = 1 << queue_id; + WREG32(reg, value); + /* wait for queue reset done */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(reg) & value)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on sdma queue reset done\n"); + r = -ETIMEDOUT; + } + } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + if (input->use_mmio) + return mes_v11_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -421,13 +541,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) @@ -600,6 +748,11 @@ static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, { union MESAPI__RESET mes_reset_queue_pkt; + if (input->use_mmio) + return mes_v11_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -636,6 +789,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, .reset_legacy_queue = mes_v11_0_reset_legacy_queue, + .reset_hw_queue = mes_v11_0_reset_hw_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -726,6 +880,28 @@ static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev, (void **)&adev->mes.ucode_fw_ptr[pipe]); } +static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) +{ + int pipe; + + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + soc21_grbm_select(adev, 3, pipe, 0, 0); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) { uint64_t ucode_addr; @@ -1095,18 +1271,6 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, mes_v11_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); - - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - return 0; } @@ -1197,9 +1361,9 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, return 0; } -static int mes_v11_0_sw_init(void *handle) +static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe, r; adev->mes.funcs = &mes_v11_0_funcs; @@ -1238,9 +1402,9 @@ static int mes_v11_0_sw_init(void *handle) return 0; } -static int mes_v11_0_sw_fini(void *handle) +static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { @@ -1334,6 +1498,7 @@ static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; + struct amdgpu_ip_block *ip_block; if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1353,20 +1518,35 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) mes_v11_0_enable(adev, true); + mes_v11_0_get_fw_version(adev); + mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); + if (unlikely(!ip_block)) { + dev_err(adev->dev, "Failed to get MES handle\n"); + return -EINVAL; + } + r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE); if (r) goto failure; - r = mes_v11_0_hw_init(adev); - if (r) - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47) + adev->mes.enable_legacy_queue_map = true; + else + adev->mes.enable_legacy_queue_map = false; + + if (adev->mes.enable_legacy_queue_map) { + r = mes_v11_0_hw_init(ip_block); + if (r) + goto failure; + } return r; failure: - mes_v11_0_hw_fini(adev); + mes_v11_0_hw_fini(ip_block); return r; } @@ -1387,10 +1567,10 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) return 0; } -static int mes_v11_0_hw_init(void *handle) +static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->mes.ring[0].sched.ready) goto out; @@ -1442,13 +1622,13 @@ static int mes_v11_0_hw_init(void *handle) return 0; failure: - mes_v11_0_hw_fini(adev); + mes_v11_0_hw_fini(ip_block); return r; } -static int mes_v11_0_hw_fini(void *handle) +static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_is_mes_info_enable(adev)) { amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr, &adev->mes.resource_1_addr); @@ -1456,33 +1636,31 @@ static int mes_v11_0_hw_fini(void *handle) return 0; } -static int mes_v11_0_suspend(void *handle) +static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_mes_suspend(adev); + r = amdgpu_mes_suspend(ip_block->adev); if (r) return r; - return mes_v11_0_hw_fini(adev); + return mes_v11_0_hw_fini(ip_block); } -static int mes_v11_0_resume(void *handle) +static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = mes_v11_0_hw_init(adev); + r = mes_v11_0_hw_init(ip_block); if (r) return r; - return amdgpu_mes_resume(adev); + return amdgpu_mes_resume(ip_block->adev); } -static int mes_v11_0_early_init(void *handle) +static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe, r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { @@ -1496,9 +1674,9 @@ static int mes_v11_0_early_init(void *handle) return 0; } -static int mes_v11_0_late_init(void *handle) +static int mes_v11_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* it's only intended for use in mes_self_test case, not for s0ix and reset */ if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 35cd6ad73912d..f50071cf95b96 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -39,8 +39,8 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin"); -static int mes_v12_0_hw_init(void *handle); -static int mes_v12_0_hw_fini(void *handle); +static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block); +static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block); static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); @@ -146,7 +146,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; - signed long timeout = 3000000; /* 3000 ms */ + signed long timeout = 2100000; /* 2100 ms */ struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring[pipe]; spinlock_t *ring_lock = &mes->ring_lock[pipe]; @@ -350,6 +350,32 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -453,6 +479,11 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, union MESAPI__MISC misc_pkt; int pipe; + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + memset(&misc_pkt, 0, sizeof(misc_pkt)); misc_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -487,6 +518,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; break; case MES_MISC_OP_SET_SHADER_DEBUGGER: + pipe = AMDGPU_MES_SCHED_PIPE; misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; misc_pkt.set_shader_debugger.process_context_addr = input->set_shader_debugger.process_context_addr; @@ -504,11 +536,6 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - if (mes->adev->enable_uni_mes) - pipe = AMDGPU_MES_KIQ_PIPE; - else - pipe = AMDGPU_MES_SCHED_PIPE; - return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); @@ -582,6 +609,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell @@ -723,6 +751,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, .reset_legacy_queue = mes_v12_0_reset_legacy_queue, + .reset_hw_queue = mes_v12_0_reset_hw_queue, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -1297,14 +1326,15 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_sw_init(void *handle) +static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe, r; adev->mes.funcs = &mes_v12_0_funcs; adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.enable_legacy_queue_map = true; adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; @@ -1332,9 +1362,9 @@ static int mes_v12_0_sw_init(void *handle) return 0; } -static int mes_v12_0_sw_fini(void *handle) +static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { @@ -1422,6 +1452,7 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; + struct amdgpu_ip_block *ip_block; if (adev->enable_uni_mes) mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); @@ -1449,6 +1480,12 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) mes_v12_0_enable(adev, true); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); + if (unlikely(!ip_block)) { + dev_err(adev->dev, "Failed to get MES handle\n"); + return -EINVAL; + } + r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE); if (r) goto failure; @@ -1461,14 +1498,16 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); } - r = mes_v12_0_hw_init(adev); - if (r) - goto failure; + if (adev->mes.enable_legacy_queue_map) { + r = mes_v12_0_hw_init(ip_block); + if (r) + goto failure; + } return r; failure: - mes_v12_0_hw_fini(adev); + mes_v12_0_hw_fini(ip_block); return r; } @@ -1490,10 +1529,10 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) return 0; } -static int mes_v12_0_hw_init(void *handle) +static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->mes.ring[0].sched.ready) goto out; @@ -1552,42 +1591,40 @@ static int mes_v12_0_hw_init(void *handle) return 0; failure: - mes_v12_0_hw_fini(adev); + mes_v12_0_hw_fini(ip_block); return r; } -static int mes_v12_0_hw_fini(void *handle) +static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int mes_v12_0_suspend(void *handle) +static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_mes_suspend(adev); + r = amdgpu_mes_suspend(ip_block->adev); if (r) return r; - return mes_v12_0_hw_fini(adev); + return mes_v12_0_hw_fini(ip_block); } -static int mes_v12_0_resume(void *handle) +static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = mes_v12_0_hw_init(adev); + r = mes_v12_0_hw_init(ip_block); if (r) return r; - return amdgpu_mes_resume(adev); + return amdgpu_mes_resume(ip_block->adev); } -static int mes_v12_0_early_init(void *handle) +static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe, r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { @@ -1599,9 +1636,9 @@ static int mes_v12_0_early_init(void *handle) return 0; } -static int mes_v12_0_late_init(void *handle) +static int mes_v12_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* it's only intended for use in mes_self_test case, not for s0ix and reset */ if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 915203b91c5fb..b01bb759d0f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,22 +559,6 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } -static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst) -{ - u32 fed, status; - - status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -584,7 +568,6 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, - .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index f47bd7ada4d79..4dcb72d1bdda2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -61,15 +61,18 @@ static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev) static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, enum idh_event event) { + int r = 0; u32 reg; reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0); - if (reg != event) + if (reg == IDH_FAIL) + r = -EINVAL; + else if (reg != event) return -ENOENT; xgpu_nv_mailbox_send_ack(adev); - return 0; + return r; } static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev) @@ -178,6 +181,9 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, if (data1 != 0) event = IDH_RAS_POISON_READY; break; + case IDH_REQ_RAS_ERROR_COUNT: + event = IDH_RAS_ERROR_COUNT_READY; + break; default: break; } @@ -456,6 +462,11 @@ static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev) return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF); } +static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev) +{ + return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, @@ -466,4 +477,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .trans_msg = xgpu_nv_mailbox_trans_msg, .ras_poison_handler = xgpu_nv_ras_poison_handler, .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr, + .req_ras_err_count = xgpu_nv_req_ras_err_count, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 1d099ffb3a5a2..9d61d76e1bf96 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -40,6 +40,7 @@ enum idh_request { IDH_LOG_VF_ERROR = 200, IDH_READY_TO_RESET = 201, IDH_RAS_POISON = 202, + IDH_REQ_RAS_ERROR_COUNT = 203, }; enum idh_event { @@ -54,6 +55,8 @@ enum idh_event { IDH_RAS_POISON_READY, IDH_PF_SOFT_FLR_NOTIFICATION, IDH_RAS_ERROR_DETECTED, + IDH_RAS_ERROR_COUNT_READY = 11, + IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index b281462093f11..93da900b7ee2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -542,19 +542,19 @@ static void navi10_ih_set_self_irq_funcs(struct amdgpu_device *adev) adev->irq.self_irq.funcs = &navi10_ih_self_irq_funcs; } -static int navi10_ih_early_init(void *handle) +static int navi10_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; navi10_ih_set_interrupt_funcs(adev); navi10_ih_set_self_irq_funcs(adev); return 0; } -static int navi10_ih_sw_init(void *handle) +static int navi10_ih_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool use_bus_addr; r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0, @@ -593,43 +593,37 @@ static int navi10_ih_sw_init(void *handle) return r; } -static int navi10_ih_sw_fini(void *handle) +static int navi10_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); return 0; } -static int navi10_ih_hw_init(void *handle) +static int navi10_ih_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return navi10_ih_irq_init(adev); } -static int navi10_ih_hw_fini(void *handle) +static int navi10_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - navi10_ih_irq_disable(adev); + navi10_ih_irq_disable(ip_block->adev); return 0; } -static int navi10_ih_suspend(void *handle) +static int navi10_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return navi10_ih_hw_fini(adev); + return navi10_ih_hw_fini(ip_block); } -static int navi10_ih_resume(void *handle) +static int navi10_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return navi10_ih_hw_init(adev); + return navi10_ih_hw_init(ip_block); } static bool navi10_ih_is_idle(void *handle) @@ -638,13 +632,13 @@ static bool navi10_ih_is_idle(void *handle) return true; } -static int navi10_ih_wait_for_idle(void *handle) +static int navi10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return -ETIMEDOUT; } -static int navi10_ih_soft_reset(void *handle) +static int navi10_ih_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h index a5b60c9a24189..c88284ff92d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h @@ -68,6 +68,7 @@ #define SDMA_SUBOP_POLL_REG_WRITE_MEM 1 #define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2 #define SDMA_SUBOP_POLL_MEM_VERIFY 3 +#define SDMA_SUBOP_VM_INVALIDATION 4 #define HEADER_AGENT_DISPATCH 4 #define HEADER_BARRIER 5 #define SDMA_OP_AQL_COPY 0 @@ -4040,6 +4041,69 @@ #define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift) +/* +** Definitions for SDMA_PKT_VM_INVALIDATION packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 +#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) + +/*define for gfx_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16 +#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift) + +/*define for mm_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24 +#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift) + +/*define for INVALIDATEREQ word*/ +/*define for invalidatereq field*/ +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) + +/*define for ADDRESSRANGELO word*/ +/*define for addressrangelo field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) + +/*define for ADDRESSRANGEHI word*/ +/*define for invalidateack field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) + +/*define for addressrangehi field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) + +/*define for reserved field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) + + /* ** Definitions for SDMA_PKT_ATOMIC packet */ diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c index 39919e0892c14..f5b504979a331 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -331,12 +331,14 @@ static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev) pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2); +#ifdef HAVE_PCI_DEV_LTR_PATH if (adev->pdev->ltr_path == (devctl2 & PCI_EXP_DEVCTL2_LTR_EN)) return; if (adev->pdev->ltr_path) pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN); else +#endif pcie_capability_clear_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN); } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index fa479dfa1ec15..b66141b5afeef 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -365,7 +365,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; } else { - /* Disbale ASPM L1 */ + /* Disable ASPM L1 */ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; /* Disable ASPM TxL0s */ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; @@ -461,9 +461,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) /* Don't bother about LTR if LTR is not enabled * in the path */ +#ifdef HAVE_PCI_DEV_LTR_PATH if (adev->pdev->ltr_path) nbio_v2_3_program_ltr(adev); - +#endif def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index a54052dea8bf5..8374df22a03d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -375,9 +375,11 @@ static void nbio_v4_3_program_ltr(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); +#ifdef HAVE_PCI_DEV_LTR_PATH if (adev->pdev->ltr_path) data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; else +#endif data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; if (def != data) WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 34180c6070dd2..08f428586624a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -362,9 +362,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) /* Don't bother about LTR if LTR is not enabled * in the path */ +#ifdef HAVE_PCI_DEV_LTR_PATH if (adev->pdev->ltr_path) nbio_v6_1_program_ltr(adev); - +#endif def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 8d80df94bd8b5..97782a73f4b02 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -414,8 +414,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device /* ras_controller_int is dedicated for nbif ras error, * not the global interrupt for sync flood */ - amdgpu_ras_set_fed(adev, true); - amdgpu_ras_reset_gpu(adev); + amdgpu_ras_global_ras_isr(adev); } amdgpu_ras_error_data_fini(&err_data); @@ -761,8 +760,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) /* Don't bother about LTR if LTR is not enabled * in the path */ +#ifdef HAVE_PCI_DEV_LTR_PATH if (adev->pdev->ltr_path) nbio_v7_4_program_ltr(adev); +#endif def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index d1bd79bbae532..8a0a63ac88d2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -401,6 +401,17 @@ static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev) return px; } +static bool nbio_v7_9_is_nps_switch_requested(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS); + tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, + CHANGE_STATUE); + + /* 0x8 - NPS switch requested */ + return (tmp == 0x8); +} static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev, u32 *supp_modes) { @@ -508,6 +519,7 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .remap_hdp_registers = nbio_v7_9_remap_hdp_registers, .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode, .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, + .is_nps_switch_requested = nbio_v7_9_is_nps_switch_requested, .init_registers = nbio_v7_9_init_registers, .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, .set_reg_remap = nbio_v7_9_set_reg_remap, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 4938e6b340e9e..4e8f9af1e2bec 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -634,9 +634,9 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = { .query_video_codecs = &nv_query_video_codecs, }; -static int nv_common_early_init(void *handle) +static int nv_common_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); adev->smc_rreg = NULL; @@ -944,9 +944,9 @@ static int nv_common_early_init(void *handle) return 0; } -static int nv_common_late_init(void *handle) +static int nv_common_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); @@ -973,9 +973,9 @@ static int nv_common_late_init(void *handle) return 0; } -static int nv_common_sw_init(void *handle) +static int nv_common_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_add_irq_id(adev); @@ -983,14 +983,14 @@ static int nv_common_sw_init(void *handle) return 0; } -static int nv_common_sw_fini(void *handle) +static int nv_common_sw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int nv_common_hw_init(void *handle) +static int nv_common_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->nbio.funcs->apply_lc_spc_mode_wa) adev->nbio.funcs->apply_lc_spc_mode_wa(adev); @@ -1014,9 +1014,9 @@ static int nv_common_hw_init(void *handle) return 0; } -static int nv_common_hw_fini(void *handle) +static int nv_common_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* Disable the doorbell aperture and selfring doorbell aperture * separately in hw_fini because nv_enable_doorbell_aperture @@ -1029,18 +1029,14 @@ static int nv_common_hw_fini(void *handle) return 0; } -static int nv_common_suspend(void *handle) +static int nv_common_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return nv_common_hw_fini(adev); + return nv_common_hw_fini(ip_block); } -static int nv_common_resume(void *handle) +static int nv_common_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return nv_common_hw_init(adev); + return nv_common_hw_init(ip_block); } static bool nv_common_is_idle(void *handle) @@ -1048,12 +1044,12 @@ static bool nv_common_is_idle(void *handle) return true; } -static int nv_common_wait_for_idle(void *handle) +static int nv_common_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int nv_common_soft_reset(void *handle) +static int nv_common_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 37b5ddd6f13b3..f4a91b126c73c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -103,6 +103,10 @@ enum psp_gfx_cmd_id GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */ GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */ GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */ + /*IDs of performance monitoring/profiling*/ + GFX_CMD_ID_CONFIG_SQ_PERFMON = 0x00000046, /* Config CGTT_SQ_CLK_CTRL */ + /* Dynamic memory partitioninig (NPS mode change)*/ + GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */ }; /* PSP boot config sub-commands */ @@ -351,6 +355,20 @@ struct psp_gfx_cmd_sriov_spatial_part { uint32_t override_this_aid; }; +/*Structure for sq performance monitoring/profiling enable/disable*/ +struct psp_gfx_cmd_config_sq_perfmon { + uint32_t gfx_xcp_mask; + uint8_t core_override; + uint8_t reg_override; + uint8_t perfmon_override; + uint8_t reserved[5]; +}; + +struct psp_gfx_cmd_fb_memory_part { + uint32_t mode; /* requested NPS mode */ + uint32_t resvd; +}; + /* All GFX ring buffer commands. */ union psp_gfx_commands { @@ -365,6 +383,8 @@ union psp_gfx_commands struct psp_gfx_cmd_load_toc cmd_load_toc; struct psp_gfx_cmd_boot_cfg boot_cfg; struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part; + struct psp_gfx_cmd_config_sq_perfmon config_sq_perfmon; + struct psp_gfx_cmd_fb_memory_part cmd_memory_part; }; struct psp_gfx_uresp_reserved diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 1251ee38a6764..c4b775aaee9fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -81,6 +81,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin"); /* memory training timeout define */ #define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 +#define regMP1_PUB_SCRATCH0 0x3b10090 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -807,6 +809,44 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) } } +static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + u32 pmfw_ver; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) + return false; + + /* load 4e version of sos if pmfw version less than 85.115.0 */ + pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4); + + return (pmfw_ver < 0x557300); +} + +static bool psp_v13_0_is_reload_needed(struct psp_context *psp) +{ + uint32_t ucode_ver; + + if (!psp_v13_0_is_sos_alive(psp)) + return false; + + /* Restrict reload support only to specific IP versions */ + switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 14): + /* TOS version read from microcode header */ + ucode_ver = psp->sos.fw_version; + /* Read TOS version from hardware */ + psp_v13_0_init_sos_version(psp); + return (ucode_ver != psp->sos.fw_version); + default: + return false; + } + + return false; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -830,6 +870,8 @@ static const struct psp_funcs psp_v13_0_funcs = { .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, .get_ras_capability = psp_v13_0_get_ras_capability, + .is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required, + .is_reload_needed = psp_v13_0_is_reload_needed, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 725392522267f..10fd772cb80fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -807,9 +807,9 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static int sdma_v2_4_early_init(void *handle) +static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; adev->sdma.num_instances = SDMA_MAX_INSTANCE; @@ -826,11 +826,11 @@ static int sdma_v2_4_early_init(void *handle) return 0; } -static int sdma_v2_4_sw_init(void *handle) +static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, @@ -866,9 +866,9 @@ static int sdma_v2_4_sw_init(void *handle) return r; } -static int sdma_v2_4_sw_fini(void *handle) +static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -878,10 +878,10 @@ static int sdma_v2_4_sw_fini(void *handle) return 0; } -static int sdma_v2_4_hw_init(void *handle) +static int sdma_v2_4_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; sdma_v2_4_init_golden_registers(adev); @@ -892,27 +892,21 @@ static int sdma_v2_4_hw_init(void *handle) return r; } -static int sdma_v2_4_hw_fini(void *handle) +static int sdma_v2_4_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - sdma_v2_4_enable(adev, false); + sdma_v2_4_enable(ip_block->adev, false); return 0; } -static int sdma_v2_4_suspend(void *handle) +static int sdma_v2_4_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v2_4_hw_fini(adev); + return sdma_v2_4_hw_fini(ip_block); } -static int sdma_v2_4_resume(void *handle) +static int sdma_v2_4_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v2_4_hw_init(adev); + return sdma_v2_4_hw_init(ip_block); } static bool sdma_v2_4_is_idle(void *handle) @@ -927,11 +921,11 @@ static bool sdma_v2_4_is_idle(void *handle) return true; } -static int sdma_v2_4_wait_for_idle(void *handle) +static int sdma_v2_4_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | @@ -944,10 +938,10 @@ static int sdma_v2_4_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int sdma_v2_4_soft_reset(void *handle) +static int sdma_v2_4_soft_reset(struct amdgpu_ip_block *ip_block) { u32 srbm_soft_reset = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS2); if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index aa637541da584..69fba087e09c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -710,7 +710,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); if (ring->use_pollmem) { - /*wptr polling is not enogh fast, directly clean the wptr register */ + /*wptr polling is not enough fast, directly clean the wptr register */ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, @@ -1080,9 +1080,9 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static int sdma_v3_0_early_init(void *handle) +static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; switch (adev->asic_type) { @@ -1106,11 +1106,11 @@ static int sdma_v3_0_early_init(void *handle) return 0; } -static int sdma_v3_0_sw_init(void *handle) +static int sdma_v3_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, @@ -1152,9 +1152,9 @@ static int sdma_v3_0_sw_init(void *handle) return r; } -static int sdma_v3_0_sw_fini(void *handle) +static int sdma_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -1164,10 +1164,10 @@ static int sdma_v3_0_sw_fini(void *handle) return 0; } -static int sdma_v3_0_hw_init(void *handle) +static int sdma_v3_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; sdma_v3_0_init_golden_registers(adev); @@ -1178,9 +1178,9 @@ static int sdma_v3_0_hw_init(void *handle) return r; } -static int sdma_v3_0_hw_fini(void *handle) +static int sdma_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); @@ -1188,18 +1188,14 @@ static int sdma_v3_0_hw_fini(void *handle) return 0; } -static int sdma_v3_0_suspend(void *handle) +static int sdma_v3_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v3_0_hw_fini(adev); + return sdma_v3_0_hw_fini(ip_block); } -static int sdma_v3_0_resume(void *handle) +static int sdma_v3_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v3_0_hw_init(adev); + return sdma_v3_0_hw_init(ip_block); } static bool sdma_v3_0_is_idle(void *handle) @@ -1214,11 +1210,11 @@ static bool sdma_v3_0_is_idle(void *handle) return true; } -static int sdma_v3_0_wait_for_idle(void *handle) +static int sdma_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK | @@ -1231,9 +1227,9 @@ static int sdma_v3_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static bool sdma_v3_0_check_soft_reset(void *handle) +static bool sdma_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS2); @@ -1252,9 +1248,9 @@ static bool sdma_v3_0_check_soft_reset(void *handle) } } -static int sdma_v3_0_pre_soft_reset(void *handle) +static int sdma_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; if (!adev->sdma.srbm_soft_reset) @@ -1271,9 +1267,9 @@ static int sdma_v3_0_pre_soft_reset(void *handle) return 0; } -static int sdma_v3_0_post_soft_reset(void *handle) +static int sdma_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; if (!adev->sdma.srbm_soft_reset) @@ -1290,9 +1286,9 @@ static int sdma_v3_0_post_soft_reset(void *handle) return 0; } -static int sdma_v3_0_soft_reset(void *handle) +static int sdma_v3_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 23ef4eb36b407..ccf0d531776d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1068,6 +1068,7 @@ static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) /* Set ring buffer size in dwords */ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); + barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); @@ -1751,9 +1752,9 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev) } } -static int sdma_v4_0_early_init(void *handle) +static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = sdma_v4_0_init_microcode(adev); @@ -1780,9 +1781,9 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); -static int sdma_v4_0_late_init(void *handle) +static int sdma_v4_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; sdma_v4_0_setup_ulv(adev); @@ -1792,11 +1793,11 @@ static int sdma_v4_0_late_init(void *handle) return 0; } -static int sdma_v4_0_sw_init(void *handle) +static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); uint32_t *ptr; @@ -1929,9 +1930,9 @@ static int sdma_v4_0_sw_init(void *handle) return r; } -static int sdma_v4_0_sw_fini(void *handle) +static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1951,9 +1952,9 @@ static int sdma_v4_0_sw_fini(void *handle) return 0; } -static int sdma_v4_0_hw_init(void *handle) +static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->flags & AMD_IS_APU) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); @@ -1964,9 +1965,9 @@ static int sdma_v4_0_hw_init(void *handle) return sdma_v4_0_start(adev); } -static int sdma_v4_0_hw_fini(void *handle) +static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; if (amdgpu_sriov_vf(adev)) @@ -1988,9 +1989,9 @@ static int sdma_v4_0_hw_fini(void *handle) return 0; } -static int sdma_v4_0_suspend(void *handle) +static int sdma_v4_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* SMU saves SDMA state for us */ if (adev->in_s0ix) { @@ -1998,12 +1999,12 @@ static int sdma_v4_0_suspend(void *handle) return 0; } - return sdma_v4_0_hw_fini(adev); + return sdma_v4_0_hw_fini(ip_block); } -static int sdma_v4_0_resume(void *handle) +static int sdma_v4_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* SMU restores SDMA state for us */ if (adev->in_s0ix) { @@ -2012,7 +2013,7 @@ static int sdma_v4_0_resume(void *handle) return 0; } - return sdma_v4_0_hw_init(adev); + return sdma_v4_0_hw_init(ip_block); } static bool sdma_v4_0_is_idle(void *handle) @@ -2030,11 +2031,11 @@ static bool sdma_v4_0_is_idle(void *handle) return true; } -static int sdma_v4_0_wait_for_idle(void *handle) +static int sdma_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i, j; u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { for (j = 0; j < adev->sdma.num_instances; j++) { @@ -2049,7 +2050,7 @@ static int sdma_v4_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int sdma_v4_0_soft_reset(void *handle) +static int sdma_v4_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ @@ -2170,7 +2171,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, instance = sdma_v4_0_irq_id_to_seq(entry->client_id); if (instance < 0 || instance >= adev->sdma.num_instances) { - dev_err(adev->dev, "sdma instance invalid %d\n", instance); + dev_err_ratelimited(adev->dev, "sdma instance invalid %d\n", instance); return -EINVAL; } @@ -2350,9 +2351,9 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } -static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p) +static void sdma_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); uint32_t instance_offset; @@ -2371,9 +2372,9 @@ static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p) } } -static void sdma_v4_0_dump_ip_state(void *handle) +static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t instance_offset; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index c77889040760a..9c7cea0890c98 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1290,9 +1290,9 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) } } -static int sdma_v4_4_2_early_init(void *handle) +static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = sdma_v4_4_2_init_microcode(adev); @@ -1318,9 +1318,9 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); #endif -static int sdma_v4_4_2_late_init(void *handle) +static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; #if 0 struct ras_ih_if ih_info = { .cb = sdma_v4_4_2_process_ras_data_cb, @@ -1332,11 +1332,11 @@ static int sdma_v4_4_2_late_init(void *handle) return 0; } -static int sdma_v4_4_2_sw_init(void *handle) +static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 aid_id; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); uint32_t *ptr; @@ -1445,9 +1445,9 @@ static int sdma_v4_4_2_sw_init(void *handle) return r; } -static int sdma_v4_4_2_sw_fini(void *handle) +static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1467,10 +1467,10 @@ static int sdma_v4_4_2_sw_fini(void *handle) return 0; } -static int sdma_v4_4_2_hw_init(void *handle) +static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t inst_mask; inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); @@ -1482,9 +1482,9 @@ static int sdma_v4_4_2_hw_init(void *handle) return r; } -static int sdma_v4_4_2_hw_fini(void *handle) +static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t inst_mask; int i; @@ -1508,21 +1508,19 @@ static int sdma_v4_4_2_hw_fini(void *handle) static int sdma_v4_4_2_set_clockgating_state(void *handle, enum amd_clockgating_state state); -static int sdma_v4_4_2_suspend(void *handle) +static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_in_reset(adev)) sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); - return sdma_v4_4_2_hw_fini(adev); + return sdma_v4_4_2_hw_fini(ip_block); } -static int sdma_v4_4_2_resume(void *handle) +static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v4_4_2_hw_init(adev); + return sdma_v4_4_2_hw_init(ip_block); } static bool sdma_v4_4_2_is_idle(void *handle) @@ -1540,11 +1538,11 @@ static bool sdma_v4_4_2_is_idle(void *handle) return true; } -static int sdma_v4_4_2_wait_for_idle(void *handle) +static int sdma_v4_4_2_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i, j; u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { for (j = 0; j < adev->sdma.num_instances; j++) { @@ -1559,7 +1557,7 @@ static int sdma_v4_4_2_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int sdma_v4_4_2_soft_reset(void *handle) +static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ @@ -1857,9 +1855,9 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } -static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p) +static void sdma_v4_4_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); uint32_t instance_offset; @@ -1878,9 +1876,9 @@ static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p) } } -static void sdma_v4_4_2_dump_ip_state(void *handle) +static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t instance_offset; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3e48ea38385de..6a675daf56202 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -705,14 +705,16 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) } /** - * sdma_v5_0_gfx_resume - setup and start the async dma engines + * sdma_v5_0_gfx_resume_instance - start/restart a certain sdma engine * * @adev: amdgpu_device pointer + * @i: instance + * @restore: used to restore wptr when restart * - * Set up the gfx DMA ring buffers and enable them (NAVI10). - * Returns 0 for success, error for failure. + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. + * Return 0 for success. */ -static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) +static int sdma_v5_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) { struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; @@ -722,142 +724,163 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) u32 temp; u32 wptr_poll_cntl; u64 wptr_gpu_addr; - int i, r; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; + ring = &adev->sdma.instance[i].ring; - if (!amdgpu_sriov_vf(adev)) - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + if (!amdgpu_sriov_vf(adev)) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + if (restore) { + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } else { WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); - - /* setup the wptr shadow polling */ - wptr_gpu_addr = ring->wptr_gpu_addr; - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, - mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, - SDMA0_GFX_RB_WPTR_POLL_CNTL, - F32_POLL_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), - wptr_poll_cntl); - - /* set the wb address whether it's enabled or not */ - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), - ring->gpu_addr >> 8); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), - ring->gpu_addr >> 40); - + } + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), + wptr_poll_cntl); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), + ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), + ring->gpu_addr >> 40); + + if (!restore) ring->wptr = 0; - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } - doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, - mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, + mmSDMA0_GFX_DOORBELL_OFFSET)); - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, - OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); - } - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), - doorbell_offset); + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), + doorbell_offset); - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index, 20); + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, 20); - if (amdgpu_sriov_vf(adev)) - sdma_v5_0_ring_set_wptr(ring); + if (amdgpu_sriov_vf(adev)) + sdma_v5_0_ring_set_wptr(ring); - /* set minor_ptr_update to 0 after wptr programed */ - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); - if (!amdgpu_sriov_vf(adev)) { - /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - - /* enable MCBP */ - temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); - - /* Set up RESP_MODE to non-copy addresses */ - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); - - /* program default cache read and write policy */ - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); - /* clean read policy and write policy bits */ - temp &= 0xFF0FFF; - temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); - } + if (!amdgpu_sriov_vf(adev)) { + /* set utc l1 enable flag always to 1 */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + + /* enable MCBP */ + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + } - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); - } + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); + } - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); #endif - /* enable DMA IBs */ - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v5_0_ctx_switch_enable(adev, true); - sdma_v5_0_enable(adev, true); - } + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v5_0_ctx_switch_enable(adev, true); + sdma_v5_0_enable(adev, true); + } - r = amdgpu_ring_test_helper(ring); + return amdgpu_ring_test_helper(ring); +} + +/** + * sdma_v5_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + r = sdma_v5_0_gfx_resume_instance(adev, i, false); if (r) return r; } @@ -1366,9 +1389,9 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } -static int sdma_v5_0_early_init(void *handle) +static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = sdma_v5_0_init_microcode(adev); @@ -1385,11 +1408,11 @@ static int sdma_v5_0_early_init(void *handle) } -static int sdma_v5_0_sw_init(void *handle) +static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); uint32_t *ptr; @@ -1439,9 +1462,9 @@ static int sdma_v5_0_sw_init(void *handle) return r; } -static int sdma_v5_0_sw_fini(void *handle) +static int sdma_v5_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -1454,10 +1477,10 @@ static int sdma_v5_0_sw_fini(void *handle) return 0; } -static int sdma_v5_0_hw_init(void *handle) +static int sdma_v5_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; sdma_v5_0_init_golden_registers(adev); @@ -1466,9 +1489,9 @@ static int sdma_v5_0_hw_init(void *handle) return r; } -static int sdma_v5_0_hw_fini(void *handle) +static int sdma_v5_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1479,18 +1502,14 @@ static int sdma_v5_0_hw_fini(void *handle) return 0; } -static int sdma_v5_0_suspend(void *handle) +static int sdma_v5_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v5_0_hw_fini(adev); + return sdma_v5_0_hw_fini(ip_block); } -static int sdma_v5_0_resume(void *handle) +static int sdma_v5_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v5_0_hw_init(adev); + return sdma_v5_0_hw_init(ip_block); } static bool sdma_v5_0_is_idle(void *handle) @@ -1508,11 +1527,11 @@ static bool sdma_v5_0_is_idle(void *handle) return true; } -static int sdma_v5_0_wait_for_idle(void *handle) +static int sdma_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 sdma0, sdma1; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); @@ -1525,13 +1544,100 @@ static int sdma_v5_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int sdma_v5_0_soft_reset(void *handle) +static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ return 0; } +static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, j, r; + u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring) + break; + } + + if (i == adev->sdma.num_instances) { + DRM_ERROR("sdma instance not found\n"); + return -EINVAL; + } + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + /* stop queue */ + ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + + rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ + freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); + + for (j = 0; j < adev->usec_timeout; j++) { + freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1) + break; + udelay(1); + } + + /* check sdma copy engine all idle if frozen not received*/ + if (j == adev->usec_timeout) { + stat1_reg = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG)); + if ((stat1_reg & 0x3FF) != 0x3FF) { + DRM_ERROR("cannot soft reset as sdma not idle\n"); + r = -ETIMEDOUT; + goto err0; + } + } + + f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + + cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); + + /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ + preempt = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); + preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); + + soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; + + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); + + udelay(50); + + soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); + + /* unfreeze*/ + freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); + + r = sdma_v5_0_gfx_resume_instance(adev, i, true); + +err0: + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; @@ -1778,9 +1884,9 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } -static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p) +static void sdma_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); uint32_t instance_offset; @@ -1799,9 +1905,9 @@ static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p) } } -static void sdma_v5_0_dump_ip_state(void *handle) +static void sdma_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t instance_offset; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); @@ -1874,6 +1980,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, + .reset = sdma_v5_0_reset_queue, }; static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index d740255edf5af..e1413ccaf7e41 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -225,14 +225,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " @@ -520,14 +522,17 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) } /** - * sdma_v5_2_gfx_resume - setup and start the async dma engines + * sdma_v5_2_gfx_resume_instance - start/restart a certain sdma engine * * @adev: amdgpu_device pointer + * @i: instance + * @restore: used to restore wptr when restart * - * Set up the gfx DMA ring buffers and enable them. - * Returns 0 for success, error for failure. + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. + * Return 0 for success. */ -static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) + +static int sdma_v5_2_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) { struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; @@ -537,139 +542,161 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) u32 temp; u32 wptr_poll_cntl; u64 wptr_gpu_addr; - int i, r; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; + ring = &adev->sdma.instance[i].ring; - if (!amdgpu_sriov_vf(adev)) - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + if (!amdgpu_sriov_vf(adev)) + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + if (restore) { + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } else { WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + } - /* setup the wptr shadow polling */ - wptr_gpu_addr = ring->wptr_gpu_addr; - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, - mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, - SDMA0_GFX_RB_WPTR_POLL_CNTL, - F32_POLL_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), - wptr_poll_cntl); - - /* set the wb address whether it's enabled or not */ - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); - + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), + wptr_poll_cntl); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + + if (!restore) ring->wptr = 0; - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); - } + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } - doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, - OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); - } - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); - if (amdgpu_sriov_vf(adev)) - sdma_v5_2_ring_set_wptr(ring); + if (amdgpu_sriov_vf(adev)) + sdma_v5_2_ring_set_wptr(ring); - /* set minor_ptr_update to 0 after wptr programed */ + /* set minor_ptr_update to 0 after wptr programed */ - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); - /* SRIOV VF has no control of any of registers below */ - if (!amdgpu_sriov_vf(adev)) { - /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - - /* enable MCBP */ - temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); - - /* Set up RESP_MODE to non-copy addresses */ - temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); - - /* program default cache read and write policy */ - temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); - /* clean read policy and write policy bits */ - temp &= 0xFF0FFF; - temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | - (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | - SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); - - /* unhalt engine */ - temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); - } + /* SRIOV VF has no control of any of registers below */ + if (!amdgpu_sriov_vf(adev)) { + /* set utc l1 enable flag always to 1 */ + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + + /* enable MCBP */ + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | + SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + + /* unhalt engine */ + temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); + } - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); #endif - /* enable DMA IBs */ - WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v5_2_ctx_switch_enable(adev, true); - sdma_v5_2_enable(adev, true); - } + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v5_2_ctx_switch_enable(adev, true); + sdma_v5_2_enable(adev, true); + } + + return amdgpu_ring_test_helper(ring); +} + +/** + * sdma_v5_2_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) +{ + int i, r; - r = amdgpu_ring_test_helper(ring); + for (i = 0; i < adev->sdma.num_instances; i++) { + r = sdma_v5_2_gfx_resume_instance(adev, i, false); if (r) return r; } @@ -734,9 +761,9 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) return 0; } -static int sdma_v5_2_soft_reset(void *handle) +static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 grbm_soft_reset; u32 tmp; int i; @@ -776,6 +803,7 @@ static int sdma_v5_2_soft_reset(void *handle) static int sdma_v5_2_start(struct amdgpu_device *adev) { int r = 0; + struct amdgpu_ip_block *ip_block; if (amdgpu_sriov_vf(adev)) { sdma_v5_2_ctx_switch_enable(adev, false); @@ -796,7 +824,11 @@ static int sdma_v5_2_start(struct amdgpu_device *adev) msleep(1000); } - sdma_v5_2_soft_reset(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA); + if (!ip_block) + return -EINVAL; + + sdma_v5_2_soft_reset(ip_block); /* unhalt the MEs */ sdma_v5_2_enable(adev, true); /* enable sdma ring preemption */ @@ -1178,7 +1210,28 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); + + /* Update the PD address for this VMID. */ + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + + /* Trigger invalidation. */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) | + SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) | + SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f)); + amdgpu_ring_write(ring, req); + amdgpu_ring_write(ring, 0xFFFFFFFF); + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) | + SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F)); } static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1214,9 +1267,9 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } -static int sdma_v5_2_early_init(void *handle) +static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_sdma_init_microcode(adev, 0, true); @@ -1266,11 +1319,11 @@ static unsigned sdma_v5_2_seq_to_trap_id(int seq_num) return -EINVAL; } -static int sdma_v5_2_sw_init(void *handle) +static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); uint32_t *ptr; @@ -1314,9 +1367,9 @@ static int sdma_v5_2_sw_init(void *handle) return r; } -static int sdma_v5_2_sw_fini(void *handle) +static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -1329,16 +1382,16 @@ static int sdma_v5_2_sw_fini(void *handle) return 0; } -static int sdma_v5_2_hw_init(void *handle) +static int sdma_v5_2_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return sdma_v5_2_start(adev); } -static int sdma_v5_2_hw_fini(void *handle) +static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1349,18 +1402,14 @@ static int sdma_v5_2_hw_fini(void *handle) return 0; } -static int sdma_v5_2_suspend(void *handle) +static int sdma_v5_2_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v5_2_hw_fini(adev); + return sdma_v5_2_hw_fini(ip_block); } -static int sdma_v5_2_resume(void *handle) +static int sdma_v5_2_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v5_2_hw_init(adev); + return sdma_v5_2_hw_init(ip_block); } static bool sdma_v5_2_is_idle(void *handle) @@ -1378,11 +1427,11 @@ static bool sdma_v5_2_is_idle(void *handle) return true; } -static int sdma_v5_2_wait_for_idle(void *handle) +static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 sdma0, sdma1, sdma2, sdma3; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); @@ -1397,6 +1446,96 @@ static int sdma_v5_2_wait_for_idle(void *handle) return -ETIMEDOUT; } +static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, j, r; + u32 rb_cntl, ib_cntl, f32_cntl, freeze, cntl, preempt, soft_reset, stat1_reg; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring) + break; + } + + if (i == adev->sdma.num_instances) { + DRM_ERROR("sdma instance not found\n"); + return -EINVAL; + } + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + /* stop queue */ + ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + + rb_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */ + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); + + for (j = 0; j < adev->usec_timeout; j++) { + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + + if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1) + break; + udelay(1); + } + + + if (j == adev->usec_timeout) { + stat1_reg = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG)); + if ((stat1_reg & 0x3FF) != 0x3FF) { + DRM_ERROR("cannot soft reset as sdma not idle\n"); + r = -ETIMEDOUT; + goto err0; + } + } + + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + + cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl); + + /* soft reset SDMA_GFX_PREEMPT.IB_PREEMPT = 0 mmGRBM_SOFT_RESET.SOFT_RESET_SDMA0/1 = 1 */ + preempt = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT)); + preempt = REG_SET_FIELD(preempt, SDMA0_GFX_PREEMPT, IB_PREEMPT, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_PREEMPT), preempt); + + soft_reset = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + soft_reset |= 1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i; + + + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); + + udelay(50); + + soft_reset &= ~(1 << GRBM_SOFT_RESET__SOFT_RESET_SDMA0__SHIFT << i); + + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, soft_reset); + + /* unfreeze and unhalt */ + freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE)); + freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze); + + r = sdma_v5_2_gfx_resume_instance(adev, i, true); + +err0: + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; @@ -1734,9 +1873,9 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_off_ctrl(adev, true); } -static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) +static void sdma_v5_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); uint32_t instance_offset; @@ -1755,9 +1894,9 @@ static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) } } -static void sdma_v5_2_dump_ip_state(void *handle) +static void sdma_v5_2_dump_ip_state(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t instance_offset; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); @@ -1832,6 +1971,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_2_ring_init_cond_exec, .preempt_ib = sdma_v5_2_ring_preempt_ib, + .reset = sdma_v5_2_reset_queue, }; static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 208a1fa9d4e7f..4b33bd6b776db 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -469,14 +469,16 @@ static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable) } /** - * sdma_v6_0_gfx_resume - setup and start the async dma engines + * sdma_v6_0_gfx_resume_instance - start/restart a certain sdma engine * * @adev: amdgpu_device pointer + * @i: instance + * @restore: used to restore wptr when restart * - * Set up the gfx DMA ring buffers and enable them. - * Returns 0 for success, error for failure. + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. + * Return 0 for success. */ -static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) +static int sdma_v6_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) { struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; @@ -485,132 +487,152 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) u32 doorbell_offset; u32 temp; u64 wptr_gpu_addr; - int i, r; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - - if (!amdgpu_sriov_vf(adev)) - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + ring = &adev->sdma.instance[i].ring; + if (!amdgpu_sriov_vf(adev)) + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + if (restore) { + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } else { WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0); + } + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1); + + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + if (!restore) + ring->wptr = 0; - /* setup the wptr shadow polling */ - wptr_gpu_addr = ring->wptr_gpu_addr; - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - - /* set the wb address whether it's enabled or not */ - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1); + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } - ring->wptr = 0; + doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - } + if (i == 0) + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); - doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); + if (amdgpu_sriov_vf(adev)) + sdma_v6_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | + SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp); - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, - OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); - } - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - if (i == 0) - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - - if (amdgpu_sriov_vf(adev)) - sdma_v6_0_ring_set_wptr(ring); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); - - /* Set up sdma hang watchdog */ - temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); - /* 100ms per unit */ - temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, - max(adev->usec_timeout/100000, 1)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); - - /* Set up RESP_MODE to non-copy addresses */ - temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); - temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp); - - /* program default cache read and write policy */ - temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); - /* clean read policy and write policy bits */ - temp &= 0xFF0FFF; - temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | - (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | - SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp); - - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp); - } + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp); + } - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); - ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); + ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); #endif - /* enable DMA IBs */ - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); - if (amdgpu_sriov_vf(adev)) - sdma_v6_0_enable(adev, true); + if (amdgpu_sriov_vf(adev)) + sdma_v6_0_enable(adev, true); + + return amdgpu_ring_test_helper(ring); +} - r = amdgpu_ring_test_helper(ring); +/** + * sdma_v6_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + r = sdma_v6_0_gfx_resume_instance(adev, i, false); if (r) return r; } @@ -733,9 +755,9 @@ static int sdma_v6_0_load_microcode(struct amdgpu_device *adev) return 0; } -static int sdma_v6_0_soft_reset(void *handle) +static int sdma_v6_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp; int i; @@ -769,9 +791,9 @@ static int sdma_v6_0_soft_reset(void *handle) return sdma_v6_0_start(adev); } -static bool sdma_v6_0_check_soft_reset(void *handle) +static bool sdma_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r; long tmo = msecs_to_jiffies(1000); @@ -1272,9 +1294,9 @@ static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev) } } -static int sdma_v6_0_early_init(void *handle) +static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_sdma_init_microcode(adev, 0, true); @@ -1291,11 +1313,11 @@ static int sdma_v6_0_early_init(void *handle) return 0; } -static int sdma_v6_0_sw_init(void *handle) +static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); uint32_t *ptr; @@ -1343,9 +1365,9 @@ static int sdma_v6_0_sw_init(void *handle) return r; } -static int sdma_v6_0_sw_fini(void *handle) +static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -1358,16 +1380,16 @@ static int sdma_v6_0_sw_fini(void *handle) return 0; } -static int sdma_v6_0_hw_init(void *handle) +static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return sdma_v6_0_start(adev); } -static int sdma_v6_0_hw_fini(void *handle) +static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1378,18 +1400,14 @@ static int sdma_v6_0_hw_fini(void *handle) return 0; } -static int sdma_v6_0_suspend(void *handle) +static int sdma_v6_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v6_0_hw_fini(adev); + return sdma_v6_0_hw_fini(ip_block); } -static int sdma_v6_0_resume(void *handle) +static int sdma_v6_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v6_0_hw_init(adev); + return sdma_v6_0_hw_init(ip_block); } static bool sdma_v6_0_is_idle(void *handle) @@ -1407,11 +1425,11 @@ static bool sdma_v6_0_is_idle(void *handle) return true; } -static int sdma_v6_0_wait_for_idle(void *handle) +static int sdma_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 sdma0, sdma1; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG)); @@ -1469,6 +1487,31 @@ static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } +static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring) + break; + } + + if (i == adev->sdma.num_instances) { + DRM_ERROR("sdma instance not found\n"); + return -EINVAL; + } + + r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); + if (r) + return r; + + return sdma_v6_0_gfx_resume_instance(adev, i, true); +} + static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -1556,9 +1599,9 @@ static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) { } -static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p) +static void sdma_v6_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); uint32_t instance_offset; @@ -1577,9 +1620,9 @@ static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p) } } -static void sdma_v6_0_dump_ip_state(void *handle) +static void sdma_v6_0_dump_ip_state(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t instance_offset; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); @@ -1652,6 +1695,7 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v6_0_ring_init_cond_exec, .preempt_ib = sdma_v6_0_ring_preempt_ib, + .reset = sdma_v6_0_reset_queue, }; static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index cfd8e183ad503..d2ce6b6a7ff64 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -51,6 +51,12 @@ MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +/*define for compression field for sdma7*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask 0x00000001 +#define SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift 16 +#define SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_compress_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_compress_shift) + static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), @@ -747,9 +753,9 @@ static int sdma_v7_0_load_microcode(struct amdgpu_device *adev) return 0; } -static int sdma_v7_0_soft_reset(void *handle) +static int sdma_v7_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp; int i; @@ -783,9 +789,9 @@ static int sdma_v7_0_soft_reset(void *handle) return sdma_v7_0_start(adev); } -static bool sdma_v7_0_check_soft_reset(void *handle) +static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r; long tmo = msecs_to_jiffies(1000); @@ -1080,13 +1086,16 @@ static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib, unsigned bytes = count * 8; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); + ib->ptr[ib->length_dw++] = bytes - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src); ib->ptr[ib->length_dw++] = upper_32_bits(src); ib->ptr[ib->length_dw++] = lower_32_bits(pe); ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = 0; } @@ -1250,9 +1259,9 @@ static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } -static int sdma_v7_0_early_init(void *handle) +static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_sdma_init_microcode(adev, 0, true); @@ -1270,11 +1279,11 @@ static int sdma_v7_0_early_init(void *handle) return 0; } -static int sdma_v7_0_sw_init(void *handle) +static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); uint32_t *ptr; @@ -1317,9 +1326,9 @@ static int sdma_v7_0_sw_init(void *handle) return r; } -static int sdma_v7_0_sw_fini(void *handle) +static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -1335,16 +1344,16 @@ static int sdma_v7_0_sw_fini(void *handle) return 0; } -static int sdma_v7_0_hw_init(void *handle) +static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return sdma_v7_0_start(adev); } -static int sdma_v7_0_hw_fini(void *handle) +static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1355,18 +1364,14 @@ static int sdma_v7_0_hw_fini(void *handle) return 0; } -static int sdma_v7_0_suspend(void *handle) +static int sdma_v7_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v7_0_hw_fini(adev); + return sdma_v7_0_hw_fini(ip_block); } -static int sdma_v7_0_resume(void *handle) +static int sdma_v7_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return sdma_v7_0_hw_init(adev); + return sdma_v7_0_hw_init(ip_block); } static bool sdma_v7_0_is_idle(void *handle) @@ -1384,11 +1389,11 @@ static bool sdma_v7_0_is_idle(void *handle) return true; } -static int sdma_v7_0_wait_for_idle(void *handle) +static int sdma_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 sdma0, sdma1; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { sdma0 = RREG32(sdma_v7_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG)); @@ -1535,9 +1540,9 @@ static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) { } -static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p) +static void sdma_v7_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); uint32_t instance_offset; @@ -1556,9 +1561,9 @@ static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p) } } -static void sdma_v7_0_dump_ip_state(void *handle) +static void sdma_v7_0_dump_ip_state(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, j; uint32_t instance_offset; uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); @@ -1721,7 +1726,8 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, uint64_t dst_offset, uint32_t byte_count) { - ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL) | + SDMA_PKT_CONSTANT_FILL_HEADER_COMPRESS(1); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); ib->ptr[ib->length_dw++] = src_data; @@ -1744,7 +1750,7 @@ static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = { - .copy_pte_num_dw = 7, + .copy_pte_num_dw = 8, .copy_pte = sdma_v7_0_vm_copy_pte, .write_pte = sdma_v7_0_vm_write_pte, .set_pte_pde = sdma_v7_0_vm_set_pte_pde, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 85235470e872c..b9934661a92ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2022,9 +2022,9 @@ static uint32_t si_get_rev_id(struct amdgpu_device *adev) >> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT; } -static int si_common_early_init(void *handle) +static int si_common_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->smc_rreg = &si_smc_rreg; adev->smc_wreg = &si_smc_wreg; @@ -2148,12 +2148,12 @@ static int si_common_early_init(void *handle) return 0; } -static int si_common_sw_init(void *handle) +static int si_common_sw_init(struct amdgpu_ip_block *ip_block) { return 0; } -static int si_common_sw_fini(void *handle) +static int si_common_sw_fini(struct amdgpu_ip_block *ip_block) { return 0; } @@ -2633,9 +2633,9 @@ static void si_fix_pci_max_read_req_size(struct amdgpu_device *adev) pcie_set_readrq(adev->pdev, 512); } -static int si_common_hw_init(void *handle) +static int si_common_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; si_fix_pci_max_read_req_size(adev); si_init_golden_registers(adev); @@ -2645,23 +2645,19 @@ static int si_common_hw_init(void *handle) return 0; } -static int si_common_hw_fini(void *handle) +static int si_common_hw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int si_common_suspend(void *handle) +static int si_common_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return si_common_hw_fini(adev); + return si_common_hw_fini(ip_block); } -static int si_common_resume(void *handle) +static int si_common_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return si_common_hw_init(adev); + return si_common_hw_init(ip_block); } static bool si_common_is_idle(void *handle) @@ -2669,12 +2665,12 @@ static bool si_common_is_idle(void *handle) return true; } -static int si_common_wait_for_idle(void *handle) +static int si_common_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int si_common_soft_reset(void *handle) +static int si_common_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 11db5b7558321..d44483ed3363e 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -457,9 +457,9 @@ static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static int si_dma_early_init(void *handle) +static int si_dma_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->sdma.num_instances = 2; @@ -471,11 +471,11 @@ static int si_dma_early_init(void *handle) return 0; } -static int si_dma_sw_init(void *handle) +static int si_dma_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* DMA0 trap event */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, @@ -506,9 +506,9 @@ static int si_dma_sw_init(void *handle) return r; } -static int si_dma_sw_fini(void *handle) +static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) @@ -517,39 +517,34 @@ static int si_dma_sw_fini(void *handle) return 0; } -static int si_dma_hw_init(void *handle) +static int si_dma_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return si_dma_start(adev); } -static int si_dma_hw_fini(void *handle) +static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - si_dma_stop(adev); + si_dma_stop(ip_block->adev); return 0; } -static int si_dma_suspend(void *handle) +static int si_dma_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return si_dma_hw_fini(adev); + return si_dma_hw_fini(ip_block); } -static int si_dma_resume(void *handle) +static int si_dma_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return si_dma_hw_init(adev); + return si_dma_hw_init(ip_block); } static bool si_dma_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 tmp = RREG32(SRBM_STATUS2); if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK)) @@ -558,20 +553,20 @@ static bool si_dma_is_idle(void *handle) return true; } -static int si_dma_wait_for_idle(void *handle) +static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (si_dma_is_idle(handle)) + if (si_dma_is_idle(adev)) return 0; udelay(1); } return -ETIMEDOUT; } -static int si_dma_soft_reset(void *handle) +static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block) { DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 5237395e4fab5..b018a3b904019 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -156,19 +156,19 @@ static void si_ih_set_rptr(struct amdgpu_device *adev, WREG32(IH_RB_RPTR, ih->rptr); } -static int si_ih_early_init(void *handle) +static int si_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; si_ih_set_interrupt_funcs(adev); return 0; } -static int si_ih_sw_init(void *handle) +static int si_ih_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) @@ -177,43 +177,37 @@ static int si_ih_sw_init(void *handle) return amdgpu_irq_init(adev); } -static int si_ih_sw_fini(void *handle) +static int si_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); return 0; } -static int si_ih_hw_init(void *handle) +static int si_ih_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return si_ih_irq_init(adev); } -static int si_ih_hw_fini(void *handle) +static int si_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - si_ih_irq_disable(adev); + si_ih_irq_disable(ip_block->adev); return 0; } -static int si_ih_suspend(void *handle) +static int si_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return si_ih_hw_fini(adev); + return si_ih_hw_fini(ip_block); } -static int si_ih_resume(void *handle) +static int si_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return si_ih_hw_init(adev); + return si_ih_hw_init(ip_block); } static bool si_ih_is_idle(void *handle) @@ -227,22 +221,22 @@ static bool si_ih_is_idle(void *handle) return true; } -static int si_ih_wait_for_idle(void *handle) +static int si_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (si_ih_is_idle(handle)) + if (si_ih_is_idle(adev)) return 0; udelay(1); } return -ETIMEDOUT; } -static int si_ih_soft_reset(void *handle) +static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(SRBM_STATUS); diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 481217c32d853..475b7df3a9089 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -81,7 +81,7 @@ static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_SDMA)) continue; - r = adev->ip_blocks[i].version->funcs->suspend(adev); + r = adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]); if (r) { dev_err(adev->dev, @@ -175,7 +175,7 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { - r = adev->ip_blocks[i].version->funcs->resume(adev); + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); if (r) { dev_err(adev->dev, "resume of IP block <%s> failed %d\n", @@ -193,7 +193,7 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) continue; - r = adev->ip_blocks[i].version->funcs->resume(adev); + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); if (r) { dev_err(adev->dev, "resume of IP block <%s> failed %d\n", @@ -213,7 +213,7 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->funcs->late_init) { r = adev->ip_blocks[i].version->funcs->late_init( - (void *)adev); + &adev->ip_blocks[i]); if (r) { dev_err(adev->dev, "late_init of IP block <%s> failed %d after reset\n", diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index dd2d66090d237..2de46087444c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -636,17 +636,23 @@ static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) mutex_unlock(&smu_i2c->mutex); } +#if defined(HAVE_I2C_LOCK_OPERATIONS_STRUCT) static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { .lock_bus = lock_bus, .trylock_bus = trylock_bus, .unlock_bus = unlock_bus, }; +#endif static int smu_v11_0_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msg, int num) { int i, ret; u16 addr, dir; +#if !defined(HAVE_I2C_LOCK_OPERATIONS_STRUCT) + lock_bus(i2c_adap, 0); +#endif + smu_v11_0_i2c_init(i2c_adap); @@ -705,6 +711,10 @@ static int smu_v11_0_i2c_xfer(struct i2c_adapter *i2c_adap, } smu_v11_0_i2c_fini(i2c_adap); + +#if !defined(HAVE_I2C_LOCK_OPERATIONS_STRUCT) + unlock_bus(i2c_adap, 0); +#endif return num; } @@ -736,7 +746,9 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev) control->dev.parent = &adev->pdev->dev; control->algo = &smu_v11_0_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0"); +#if defined(HAVE_I2C_LOCK_OPERATIONS_STRUCT) control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; +#endif control->quirks = &smu_v11_0_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index 0af648931df58..5ea9090b5040a 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -80,7 +80,7 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_MES)) continue; - r = adev->ip_blocks[i].version->funcs->suspend(adev); + r = adev->ip_blocks[i].version->funcs->suspend(&adev->ip_blocks[i]); if (r) { dev_err(adev->dev, @@ -186,7 +186,7 @@ static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) continue; - r = adev->ip_blocks[i].version->funcs->resume(adev); + r = adev->ip_blocks[i].version->funcs->resume(&adev->ip_blocks[i]); if (r) { dev_err(adev->dev, "resume of IP block <%s> failed %d\n", @@ -208,7 +208,7 @@ static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->funcs->late_init) { r = adev->ip_blocks[i].version->funcs->late_init( - (void *)adev); + &adev->ip_blocks[i]); if (r) { dev_err(adev->dev, "late_init of IP block <%s> failed %d after reset\n", diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c index e4e30b9d481b4..c04fdd2d5b389 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -60,7 +60,7 @@ static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *f { u32 data; - /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */ + /* CGTT_ROM_CLK_CTRL0 is not available for APUs */ if (adev->flags & AMD_IS_APU) return; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8d16dacdc1720..6fcdeb265a22c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -583,15 +583,15 @@ static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. + * 1) S3 suspend abort and TOS already launched. */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !adev->suspend_complete && - sol_reg) + if (adev->in_s3 && sol_reg) { + adev->suspend_complete = false; return true; - - return false; + } else { + adev->suspend_complete = true; + return false; + } } static int soc15_asic_reset(struct amdgpu_device *adev) @@ -829,6 +829,10 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) if (adev->asic_type == CHIP_RENOIR) return true; + if (amdgpu_gmc_need_reset_on_init(adev)) + return true; + if (amdgpu_psp_tos_reload_needed(adev)) + return true; /* Just return false for soc15 GPUs. Reset does not seem to * be necessary. */ @@ -929,9 +933,9 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .get_reg_state = &aqua_vanjaram_get_reg_state, }; -static int soc15_common_early_init(void *handle) +static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); adev->smc_rreg = NULL; @@ -1198,9 +1202,9 @@ static int soc15_common_early_init(void *handle) return 0; } -static int soc15_common_late_init(void *handle) +static int soc15_common_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); @@ -1213,9 +1217,9 @@ static int soc15_common_late_init(void *handle) return 0; } -static int soc15_common_sw_init(void *handle) +static int soc15_common_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); @@ -1227,9 +1231,9 @@ static int soc15_common_sw_init(void *handle) return 0; } -static int soc15_common_sw_fini(void *handle) +static int soc15_common_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->df.funcs && adev->df.funcs->sw_fini) @@ -1251,9 +1255,9 @@ static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev) } } -static int soc15_common_hw_init(void *handle) +static int soc15_common_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* enable aspm */ soc15_program_aspm(adev); @@ -1280,9 +1284,9 @@ static int soc15_common_hw_init(void *handle) return 0; } -static int soc15_common_hw_fini(void *handle) +static int soc15_common_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* Disable the doorbell aperture and selfring doorbell aperture * separately in hw_fini because soc15_enable_doorbell_aperture @@ -1295,7 +1299,12 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); + /* + * For minimal init, late_init is not called, hence RAS irqs are not + * enabled. + */ if ((!amdgpu_sriov_vf(adev)) && + (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) && adev->nbio.ras_if && amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { if (adev->nbio.ras && @@ -1309,22 +1318,20 @@ static int soc15_common_hw_fini(void *handle) return 0; } -static int soc15_common_suspend(void *handle) +static int soc15_common_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return soc15_common_hw_fini(adev); + return soc15_common_hw_fini(ip_block); } -static int soc15_common_resume(void *handle) +static int soc15_common_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (soc15_need_reset_on_resume(adev)) { dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); soc15_asic_reset(adev); } - return soc15_common_hw_init(adev); + return soc15_common_hw_init(ip_block); } static bool soc15_common_is_idle(void *handle) @@ -1332,12 +1339,12 @@ static bool soc15_common_is_idle(void *handle) return true; } -static int soc15_common_wait_for_idle(void *handle) +static int soc15_common_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int soc15_common_soft_reset(void *handle) +static int soc15_common_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index e74e1983da53a..b9cbeb389edc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -413,6 +413,10 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d30ad7d56def9..03b9bcb8eb6d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -556,9 +556,9 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = { .update_umd_stable_pstate = &soc21_update_umd_stable_pstate, }; -static int soc21_common_early_init(void *handle) +static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); adev->smc_rreg = NULL; @@ -794,9 +794,9 @@ static int soc21_common_early_init(void *handle) return 0; } -static int soc21_common_late_init(void *handle) +static int soc21_common_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); @@ -832,9 +832,9 @@ static int soc21_common_late_init(void *handle) return 0; } -static int soc21_common_sw_init(void *handle) +static int soc21_common_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_add_irq_id(adev); @@ -842,14 +842,14 @@ static int soc21_common_sw_init(void *handle) return 0; } -static int soc21_common_sw_fini(void *handle) +static int soc21_common_sw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int soc21_common_hw_init(void *handle) +static int soc21_common_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* enable aspm */ soc21_program_aspm(adev); @@ -867,9 +867,9 @@ static int soc21_common_hw_init(void *handle) return 0; } -static int soc21_common_hw_fini(void *handle) +static int soc21_common_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* Disable the doorbell aperture and selfring doorbell aperture * separately in hw_fini because soc21_enable_doorbell_aperture @@ -890,11 +890,9 @@ static int soc21_common_hw_fini(void *handle) return 0; } -static int soc21_common_suspend(void *handle) +static int soc21_common_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return soc21_common_hw_fini(adev); + return soc21_common_hw_fini(ip_block); } static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) @@ -905,28 +903,29 @@ static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) * 1) Only reset dGPU side. * 2) S3 suspend got aborted and TOS is active. */ - if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && - !adev->suspend_complete) { + if (!(adev->flags & AMD_IS_APU) && adev->in_s3) { sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); msleep(100); sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); - - return (sol_reg1 != sol_reg2); + if (sol_reg1 != sol_reg2) { + adev->suspend_complete = false; + return true; + } } return false; } -static int soc21_common_resume(void *handle) +static int soc21_common_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (soc21_need_reset_on_resume(adev)) { dev_info(adev->dev, "S3 suspend aborted, resetting..."); soc21_asic_reset(adev); } - return soc21_common_hw_init(adev); + return soc21_common_hw_init(ip_block); } static bool soc21_common_is_idle(void *handle) @@ -934,12 +933,12 @@ static bool soc21_common_is_idle(void *handle) return true; } -static int soc21_common_wait_for_idle(void *handle) +static int soc21_common_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int soc21_common_soft_reset(void *handle) +static int soc21_common_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index b0c3678cfb31d..b20dc81dc2574 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -250,13 +250,6 @@ static void soc24_program_aspm(struct amdgpu_device *adev) adev->nbio.funcs->program_aspm(adev); } -static void soc24_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) -{ - adev->nbio.funcs->enable_doorbell_aperture(adev, enable); - adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); -} - const struct amdgpu_ip_block_version soc24_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, .major = 1, @@ -370,9 +363,9 @@ static const struct amdgpu_asic_funcs soc24_asic_funcs = { .update_umd_stable_pstate = &soc24_update_umd_stable_pstate, }; -static int soc24_common_early_init(void *handle) +static int soc24_common_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->set_reg_remap(adev); adev->smc_rreg = NULL; @@ -447,19 +440,24 @@ static int soc24_common_early_init(void *handle) return 0; } -static int soc24_common_late_init(void *handle) +static int soc24_common_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_get_irq(adev); + /* Enable selfring doorbell aperture late because doorbell BAR + * aperture will change if resize BAR successfully in gmc sw_init. + */ + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true); + return 0; } -static int soc24_common_sw_init(void *handle) +static int soc24_common_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_add_irq_id(adev); @@ -467,14 +465,14 @@ static int soc24_common_sw_init(void *handle) return 0; } -static int soc24_common_sw_fini(void *handle) +static int soc24_common_sw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int soc24_common_hw_init(void *handle) +static int soc24_common_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* enable aspm */ soc24_program_aspm(adev); @@ -491,17 +489,22 @@ static int soc24_common_hw_init(void *handle) adev->df.funcs->hw_init(adev); /* enable the doorbell aperture */ - soc24_enable_doorbell_aperture(adev, true); + adev->nbio.funcs->enable_doorbell_aperture(adev, true); return 0; } -static int soc24_common_hw_fini(void *handle) +static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; - /* disable the doorbell aperture */ - soc24_enable_doorbell_aperture(adev, false); + /* Disable the doorbell aperture and selfring doorbell aperture + * separately in hw_fini because soc21_enable_doorbell_aperture + * has been removed and there is no need to delay disabling + * selfring doorbell. + */ + adev->nbio.funcs->enable_doorbell_aperture(adev, false); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); if (amdgpu_sriov_vf(adev)) xgpu_nv_mailbox_put_irq(adev); @@ -509,18 +512,14 @@ static int soc24_common_hw_fini(void *handle) return 0; } -static int soc24_common_suspend(void *handle) +static int soc24_common_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return soc24_common_hw_fini(adev); + return soc24_common_hw_fini(ip_block); } -static int soc24_common_resume(void *handle) +static int soc24_common_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return soc24_common_hw_init(adev); + return soc24_common_hw_init(ip_block); } static bool soc24_common_is_idle(void *handle) @@ -528,12 +527,12 @@ static bool soc24_common_is_idle(void *handle) return true; } -static int soc24_common_wait_for_idle(void *handle) +static int soc24_common_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int soc24_common_soft_reset(void *handle) +static int soc24_common_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 24d49d813607f..45fb5140c8b71 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -283,9 +283,9 @@ static void tonga_ih_set_rptr(struct amdgpu_device *adev, } } -static int tonga_ih_early_init(void *handle) +static int tonga_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = amdgpu_irq_add_domain(adev); @@ -297,10 +297,10 @@ static int tonga_ih_early_init(void *handle) return 0; } -static int tonga_ih_sw_init(void *handle) +static int tonga_ih_sw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true); if (r) @@ -314,9 +314,9 @@ static int tonga_ih_sw_init(void *handle) return r; } -static int tonga_ih_sw_fini(void *handle) +static int tonga_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); amdgpu_irq_remove_domain(adev); @@ -324,10 +324,10 @@ static int tonga_ih_sw_fini(void *handle) return 0; } -static int tonga_ih_hw_init(void *handle) +static int tonga_ih_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = tonga_ih_irq_init(adev); if (r) @@ -336,27 +336,21 @@ static int tonga_ih_hw_init(void *handle) return 0; } -static int tonga_ih_hw_fini(void *handle) +static int tonga_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - tonga_ih_irq_disable(adev); + tonga_ih_irq_disable(ip_block->adev); return 0; } -static int tonga_ih_suspend(void *handle) +static int tonga_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return tonga_ih_hw_fini(adev); + return tonga_ih_hw_fini(ip_block); } -static int tonga_ih_resume(void *handle) +static int tonga_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return tonga_ih_hw_init(adev); + return tonga_ih_hw_init(ip_block); } static bool tonga_ih_is_idle(void *handle) @@ -370,11 +364,11 @@ static bool tonga_ih_is_idle(void *handle) return true; } -static int tonga_ih_wait_for_idle(void *handle) +static int tonga_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { /* read MC_STATUS */ @@ -386,9 +380,9 @@ static int tonga_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static bool tonga_ih_check_soft_reset(void *handle) +static bool tonga_ih_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -405,29 +399,27 @@ static bool tonga_ih_check_soft_reset(void *handle) } } -static int tonga_ih_pre_soft_reset(void *handle) +static int tonga_ih_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (!adev->irq.srbm_soft_reset) + if (!ip_block->adev->irq.srbm_soft_reset) return 0; - return tonga_ih_hw_fini(adev); + return tonga_ih_hw_fini(ip_block); } -static int tonga_ih_post_soft_reset(void *handle) +static int tonga_ih_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->irq.srbm_soft_reset) return 0; - return tonga_ih_hw_init(adev); + return tonga_ih_hw_init(ip_block); } -static int tonga_ih_soft_reset(void *handle) +static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset; if (!adev->irq.srbm_soft_reset) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 805d6662c88b6..c5540c30d1bbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -531,9 +531,9 @@ static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev) } -static int uvd_v3_1_early_init(void *handle) +static int uvd_v3_1_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->uvd.num_uvd_inst = 1; uvd_v3_1_set_ring_funcs(adev); @@ -542,10 +542,10 @@ static int uvd_v3_1_early_init(void *handle) return 0; } -static int uvd_v3_1_sw_init(void *handle) +static int uvd_v3_1_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; void *ptr; uint32_t ucode_len; @@ -580,10 +580,10 @@ static int uvd_v3_1_sw_init(void *handle) return r; } -static int uvd_v3_1_sw_fini(void *handle) +static int uvd_v3_1_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_uvd_suspend(adev); if (r) @@ -625,9 +625,9 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev, * * Initialize the hardware, boot up the VCPU and do some testing */ -static int uvd_v3_1_hw_init(void *handle) +static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int r; @@ -692,9 +692,9 @@ static int uvd_v3_1_hw_init(void *handle) * * Stop the UVD block, mark ring as not ready any more */ -static int uvd_v3_1_hw_fini(void *handle) +static int uvd_v3_1_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -704,17 +704,17 @@ static int uvd_v3_1_hw_fini(void *handle) return 0; } -static int uvd_v3_1_prepare_suspend(void *handle) +static int uvd_v3_1_prepare_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return amdgpu_uvd_prepare_suspend(adev); } -static int uvd_v3_1_suspend(void *handle) +static int uvd_v3_1_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * Proper cleanups before halting the HW engine: @@ -740,23 +740,22 @@ static int uvd_v3_1_suspend(void *handle) AMD_CG_STATE_GATE); } - r = uvd_v3_1_hw_fini(adev); + r = uvd_v3_1_hw_fini(ip_block); if (r) return r; return amdgpu_uvd_suspend(adev); } -static int uvd_v3_1_resume(void *handle) +static int uvd_v3_1_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_uvd_resume(adev); + r = amdgpu_uvd_resume(ip_block->adev); if (r) return r; - return uvd_v3_1_hw_init(adev); + return uvd_v3_1_hw_init(ip_block); } static bool uvd_v3_1_is_idle(void *handle) @@ -766,10 +765,10 @@ static bool uvd_v3_1_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } -static int uvd_v3_1_wait_for_idle(void *handle) +static int uvd_v3_1_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK)) @@ -778,9 +777,9 @@ static int uvd_v3_1_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int uvd_v3_1_soft_reset(void *handle) +static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uvd_v3_1_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 3f19c606f4de5..02e2dda638282 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -90,9 +90,9 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } -static int uvd_v4_2_early_init(void *handle) +static int uvd_v4_2_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->uvd.num_uvd_inst = 1; uvd_v4_2_set_ring_funcs(adev); @@ -101,10 +101,10 @@ static int uvd_v4_2_early_init(void *handle) return 0; } -static int uvd_v4_2_sw_init(void *handle) +static int uvd_v4_2_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; /* UVD TRAP */ @@ -130,10 +130,10 @@ static int uvd_v4_2_sw_init(void *handle) return r; } -static int uvd_v4_2_sw_fini(void *handle) +static int uvd_v4_2_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_uvd_suspend(adev); if (r) @@ -151,9 +151,9 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, * * Initialize the hardware, boot up the VCPU and do some testing */ -static int uvd_v4_2_hw_init(void *handle) +static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int r; @@ -206,9 +206,9 @@ static int uvd_v4_2_hw_init(void *handle) * * Stop the UVD block, mark ring as not ready any more */ -static int uvd_v4_2_hw_fini(void *handle) +static int uvd_v4_2_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -218,17 +218,17 @@ static int uvd_v4_2_hw_fini(void *handle) return 0; } -static int uvd_v4_2_prepare_suspend(void *handle) +static int uvd_v4_2_prepare_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return amdgpu_uvd_prepare_suspend(adev); } -static int uvd_v4_2_suspend(void *handle) +static int uvd_v4_2_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * Proper cleanups before halting the HW engine: @@ -254,23 +254,22 @@ static int uvd_v4_2_suspend(void *handle) AMD_CG_STATE_GATE); } - r = uvd_v4_2_hw_fini(adev); + r = uvd_v4_2_hw_fini(ip_block); if (r) return r; return amdgpu_uvd_suspend(adev); } -static int uvd_v4_2_resume(void *handle) +static int uvd_v4_2_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_uvd_resume(adev); + r = amdgpu_uvd_resume(ip_block->adev); if (r) return r; - return uvd_v4_2_hw_init(adev); + return uvd_v4_2_hw_init(ip_block); } /** @@ -666,10 +665,10 @@ static bool uvd_v4_2_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } -static int uvd_v4_2_wait_for_idle(void *handle) +static int uvd_v4_2_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK)) @@ -678,9 +677,9 @@ static int uvd_v4_2_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int uvd_v4_2_soft_reset(void *handle) +static int uvd_v4_2_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uvd_v4_2_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index efd903c21d48e..d84b49064138d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -88,9 +88,9 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } -static int uvd_v5_0_early_init(void *handle) +static int uvd_v5_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->uvd.num_uvd_inst = 1; uvd_v5_0_set_ring_funcs(adev); @@ -99,10 +99,10 @@ static int uvd_v5_0_early_init(void *handle) return 0; } -static int uvd_v5_0_sw_init(void *handle) +static int uvd_v5_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; /* UVD TRAP */ @@ -128,10 +128,10 @@ static int uvd_v5_0_sw_init(void *handle) return r; } -static int uvd_v5_0_sw_fini(void *handle) +static int uvd_v5_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_uvd_suspend(adev); if (r) @@ -147,9 +147,9 @@ static int uvd_v5_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int uvd_v5_0_hw_init(void *handle) +static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int r; @@ -204,9 +204,9 @@ static int uvd_v5_0_hw_init(void *handle) * * Stop the UVD block, mark ring as not ready any more */ -static int uvd_v5_0_hw_fini(void *handle) +static int uvd_v5_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -216,17 +216,17 @@ static int uvd_v5_0_hw_fini(void *handle) return 0; } -static int uvd_v5_0_prepare_suspend(void *handle) +static int uvd_v5_0_prepare_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return amdgpu_uvd_prepare_suspend(adev); } -static int uvd_v5_0_suspend(void *handle) +static int uvd_v5_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * Proper cleanups before halting the HW engine: @@ -252,23 +252,22 @@ static int uvd_v5_0_suspend(void *handle) AMD_CG_STATE_GATE); } - r = uvd_v5_0_hw_fini(adev); + r = uvd_v5_0_hw_fini(ip_block); if (r) return r; return amdgpu_uvd_suspend(adev); } -static int uvd_v5_0_resume(void *handle) +static int uvd_v5_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_uvd_resume(adev); + r = amdgpu_uvd_resume(ip_block->adev); if (r) return r; - return uvd_v5_0_hw_init(adev); + return uvd_v5_0_hw_init(ip_block); } /** @@ -588,10 +587,10 @@ static bool uvd_v5_0_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } -static int uvd_v5_0_wait_for_idle(void *handle) +static int uvd_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK)) @@ -600,9 +599,9 @@ static int uvd_v5_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int uvd_v5_0_soft_reset(void *handle) +static int uvd_v5_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uvd_v5_0_stop(adev); @@ -796,10 +795,15 @@ static int uvd_v5_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_CG_STATE_GATE); + struct amdgpu_ip_block *ip_block; + + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); + if (!ip_block) + return -EINVAL; if (enable) { /* wait for STATUS to clear */ - if (uvd_v5_0_wait_for_idle(handle)) + if (uvd_v5_0_wait_for_idle(ip_block)) return -EBUSY; uvd_v5_0_enable_clock_gating(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 495de50684554..d14b1769f74ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -354,9 +354,9 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) return r; } -static int uvd_v6_0_early_init(void *handle) +static int uvd_v6_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->uvd.num_uvd_inst = 1; if (!(adev->flags & AMD_IS_APU) && @@ -375,11 +375,11 @@ static int uvd_v6_0_early_init(void *handle) return 0; } -static int uvd_v6_0_sw_init(void *handle) +static int uvd_v6_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* UVD TRAP */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); @@ -435,10 +435,10 @@ static int uvd_v6_0_sw_init(void *handle) return r; } -static int uvd_v6_0_sw_fini(void *handle) +static int uvd_v6_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_uvd_suspend(adev); if (r) @@ -459,9 +459,9 @@ static int uvd_v6_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int uvd_v6_0_hw_init(void *handle) +static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int i, r; @@ -528,9 +528,9 @@ static int uvd_v6_0_hw_init(void *handle) * * Stop the UVD block, mark ring as not ready any more */ -static int uvd_v6_0_hw_fini(void *handle) +static int uvd_v6_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -540,17 +540,17 @@ static int uvd_v6_0_hw_fini(void *handle) return 0; } -static int uvd_v6_0_prepare_suspend(void *handle) +static int uvd_v6_0_prepare_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return amdgpu_uvd_prepare_suspend(adev); } -static int uvd_v6_0_suspend(void *handle) +static int uvd_v6_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * Proper cleanups before halting the HW engine: @@ -576,23 +576,22 @@ static int uvd_v6_0_suspend(void *handle) AMD_CG_STATE_GATE); } - r = uvd_v6_0_hw_fini(adev); + r = uvd_v6_0_hw_fini(ip_block); if (r) return r; return amdgpu_uvd_suspend(adev); } -static int uvd_v6_0_resume(void *handle) +static int uvd_v6_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_uvd_resume(adev); + r = amdgpu_uvd_resume(ip_block->adev); if (r) return r; - return uvd_v6_0_hw_init(adev); + return uvd_v6_0_hw_init(ip_block); } /** @@ -1151,22 +1150,22 @@ static bool uvd_v6_0_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } -static int uvd_v6_0_wait_for_idle(void *handle) +static int uvd_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (uvd_v6_0_is_idle(handle)) + if (uvd_v6_0_is_idle(adev)) return 0; } return -ETIMEDOUT; } #define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd -static bool uvd_v6_0_check_soft_reset(void *handle) +static bool uvd_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -1184,9 +1183,9 @@ static bool uvd_v6_0_check_soft_reset(void *handle) } } -static int uvd_v6_0_pre_soft_reset(void *handle) +static int uvd_v6_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->uvd.inst->srbm_soft_reset) return 0; @@ -1195,9 +1194,9 @@ static int uvd_v6_0_pre_soft_reset(void *handle) return 0; } -static int uvd_v6_0_soft_reset(void *handle) +static int uvd_v6_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset; if (!adev->uvd.inst->srbm_soft_reset) @@ -1226,9 +1225,9 @@ static int uvd_v6_0_soft_reset(void *handle) return 0; } -static int uvd_v6_0_post_soft_reset(void *handle) +static int uvd_v6_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->uvd.inst->srbm_soft_reset) return 0; @@ -1455,11 +1454,16 @@ static int uvd_v6_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ip_block *ip_block; bool enable = (state == AMD_CG_STATE_GATE); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); + if (!ip_block) + return -EINVAL; + if (enable) { /* wait for STATUS to clear */ - if (uvd_v6_0_wait_for_idle(handle)) + if (uvd_v6_0_wait_for_idle(ip_block)) return -EBUSY; uvd_v6_0_enable_clock_gating(adev, true); /* enable HW gates because UVD is idle */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 6068b784dc693..52ce3ac38215c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -361,9 +361,9 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) return r; } -static int uvd_v7_0_early_init(void *handle) +static int uvd_v7_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->asic_type == CHIP_VEGA20) { u32 harvest; @@ -395,12 +395,12 @@ static int uvd_v7_0_early_init(void *handle) return 0; } -static int uvd_v7_0_sw_init(void *handle) +static int uvd_v7_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, j, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (j = 0; j < adev->uvd.num_uvd_inst; j++) { if (adev->uvd.harvest_config & (1 << j)) @@ -487,10 +487,10 @@ static int uvd_v7_0_sw_init(void *handle) return r; } -static int uvd_v7_0_sw_fini(void *handle) +static int uvd_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) { int i, j, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_virt_free_mm_table(adev); @@ -514,9 +514,9 @@ static int uvd_v7_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int uvd_v7_0_hw_init(void *handle) +static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; uint32_t tmp; int i, j, r; @@ -592,9 +592,9 @@ static int uvd_v7_0_hw_init(void *handle) * * Stop the UVD block, mark ring as not ready any more */ -static int uvd_v7_0_hw_fini(void *handle) +static int uvd_v7_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -608,17 +608,17 @@ static int uvd_v7_0_hw_fini(void *handle) return 0; } -static int uvd_v7_0_prepare_suspend(void *handle) +static int uvd_v7_0_prepare_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return amdgpu_uvd_prepare_suspend(adev); } -static int uvd_v7_0_suspend(void *handle) +static int uvd_v7_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * Proper cleanups before halting the HW engine: @@ -644,23 +644,22 @@ static int uvd_v7_0_suspend(void *handle) AMD_CG_STATE_GATE); } - r = uvd_v7_0_hw_fini(adev); + r = uvd_v7_0_hw_fini(ip_block); if (r) return r; return amdgpu_uvd_suspend(adev); } -static int uvd_v7_0_resume(void *handle) +static int uvd_v7_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_uvd_resume(adev); + r = amdgpu_uvd_resume(ip_block->adev); if (r) return r; - return uvd_v7_0_hw_init(adev); + return uvd_v7_0_hw_init(ip_block); } /** @@ -1471,10 +1470,10 @@ static bool uvd_v7_0_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } -static int uvd_v7_0_wait_for_idle(void *handle) +static int uvd_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { if (uvd_v7_0_is_idle(handle)) @@ -1484,9 +1483,9 @@ static int uvd_v7_0_wait_for_idle(void *handle) } #define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd -static bool uvd_v7_0_check_soft_reset(void *handle) +static bool uvd_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -1506,9 +1505,9 @@ static bool uvd_v7_0_check_soft_reset(void *handle) } } -static int uvd_v7_0_pre_soft_reset(void *handle) +static int uvd_v7_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; @@ -1517,9 +1516,9 @@ static int uvd_v7_0_pre_soft_reset(void *handle) return 0; } -static int uvd_v7_0_soft_reset(void *handle) +static int uvd_v7_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset; if (!adev->uvd.inst[ring->me].srbm_soft_reset) @@ -1548,9 +1547,9 @@ static int uvd_v7_0_soft_reset(void *handle) return 0; } -static int uvd_v7_0_post_soft_reset(void *handle) +static int uvd_v7_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; @@ -1728,6 +1727,11 @@ static int uvd_v7_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_CG_STATE_GATE); + struct amdgpu_ip_block *ip_block; + + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); + if (!ip_block) + return -EINVAL; uvd_v7_0_set_bypass_mode(adev, enable); @@ -1739,7 +1743,7 @@ static int uvd_v7_0_set_clockgating_state(void *handle, uvd_v7_0_set_sw_clock_gating(adev); } else { /* wait for STATUS to clear */ - if (uvd_v7_0_wait_for_idle(handle)) + if (uvd_v7_0_wait_for_idle(ip_block)) return -EBUSY; /* enable HW gates because UVD is idle */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 66fada199bda2..a4531000ec0bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -208,13 +208,13 @@ static bool vce_v2_0_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK); } -static int vce_v2_0_wait_for_idle(void *handle) +static int vce_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; unsigned i; for (i = 0; i < adev->usec_timeout; i++) { - if (vce_v2_0_is_idle(handle)) + if (vce_v2_0_is_idle(adev)) return 0; } return -ETIMEDOUT; @@ -274,15 +274,21 @@ static int vce_v2_0_start(struct amdgpu_device *adev) static int vce_v2_0_stop(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; int i; int status; + if (vce_v2_0_lmi_clean(adev)) { DRM_INFO("vce is not idle \n"); return 0; } - if (vce_v2_0_wait_for_idle(adev)) { + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCN); + if (!ip_block) + return -EINVAL; + + if (vce_v2_0_wait_for_idle(ip_block)) { DRM_INFO("VCE is busy, Can't set clock gating"); return 0; } @@ -398,9 +404,9 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable, } } -static int vce_v2_0_early_init(void *handle) +static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->vce.num_rings = 2; @@ -410,11 +416,11 @@ static int vce_v2_0_early_init(void *handle) return 0; } -static int vce_v2_0_sw_init(void *handle) +static int vce_v2_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* VCE */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq); @@ -444,10 +450,10 @@ static int vce_v2_0_sw_init(void *handle) return r; } -static int vce_v2_0_sw_fini(void *handle) +static int vce_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_vce_suspend(adev); if (r) @@ -456,10 +462,10 @@ static int vce_v2_0_sw_fini(void *handle) return amdgpu_vce_sw_fini(adev); } -static int vce_v2_0_hw_init(void *handle) +static int vce_v2_0_hw_init(struct amdgpu_ip_block *ip_block) { int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_asic_set_vce_clocks(adev, 10000, 10000); vce_v2_0_enable_mgcg(adev, true, false); @@ -475,19 +481,17 @@ static int vce_v2_0_hw_init(void *handle) return 0; } -static int vce_v2_0_hw_fini(void *handle) +static int vce_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - cancel_delayed_work_sync(&adev->vce.idle_work); + cancel_delayed_work_sync(&ip_block->adev->vce.idle_work); return 0; } -static int vce_v2_0_suspend(void *handle) +static int vce_v2_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* @@ -513,28 +517,27 @@ static int vce_v2_0_suspend(void *handle) AMD_CG_STATE_GATE); } - r = vce_v2_0_hw_fini(adev); + r = vce_v2_0_hw_fini(ip_block); if (r) return r; return amdgpu_vce_suspend(adev); } -static int vce_v2_0_resume(void *handle) +static int vce_v2_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vce_resume(adev); + r = amdgpu_vce_resume(ip_block->adev); if (r) return r; - return vce_v2_0_hw_init(adev); + return vce_v2_0_hw_init(ip_block); } -static int vce_v2_0_soft_reset(void *handle) +static int vce_v2_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; WREG32_FIELD(SRBM_SOFT_RESET, SOFT_RESET_VCE, 1); mdelay(5); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 4bfba2931b088..9f9a9d89bcdc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -64,7 +64,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); -static int vce_v3_0_wait_for_idle(void *handle); +static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block); static int vce_v3_0_set_clockgating_state(void *handle, enum amd_clockgating_state state); /** @@ -396,9 +396,9 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) } } -static int vce_v3_0_early_init(void *handle) +static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev); @@ -415,9 +415,9 @@ static int vce_v3_0_early_init(void *handle) return 0; } -static int vce_v3_0_sw_init(void *handle) +static int vce_v3_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int r, i; @@ -453,10 +453,10 @@ static int vce_v3_0_sw_init(void *handle) return r; } -static int vce_v3_0_sw_fini(void *handle) +static int vce_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_vce_suspend(adev); if (r) @@ -465,10 +465,10 @@ static int vce_v3_0_sw_fini(void *handle) return amdgpu_vce_sw_fini(adev); } -static int vce_v3_0_hw_init(void *handle) +static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block) { int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vce_v3_0_override_vce_clock_gating(adev, true); @@ -485,14 +485,14 @@ static int vce_v3_0_hw_init(void *handle) return 0; } -static int vce_v3_0_hw_fini(void *handle) +static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vce.idle_work); - r = vce_v3_0_wait_for_idle(handle); + r = vce_v3_0_wait_for_idle(ip_block); if (r) return r; @@ -500,10 +500,10 @@ static int vce_v3_0_hw_fini(void *handle) return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); } -static int vce_v3_0_suspend(void *handle) +static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * Proper cleanups before halting the HW engine: @@ -528,23 +528,22 @@ static int vce_v3_0_suspend(void *handle) AMD_CG_STATE_GATE); } - r = vce_v3_0_hw_fini(adev); + r = vce_v3_0_hw_fini(ip_block); if (r) return r; return amdgpu_vce_suspend(adev); } -static int vce_v3_0_resume(void *handle) +static int vce_v3_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vce_resume(adev); + r = amdgpu_vce_resume(ip_block->adev); if (r) return r; - return vce_v3_0_hw_init(adev); + return vce_v3_0_hw_init(ip_block); } static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) @@ -609,13 +608,13 @@ static bool vce_v3_0_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS2) & mask); } -static int vce_v3_0_wait_for_idle(void *handle) +static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) - if (vce_v3_0_is_idle(handle)) + if (vce_v3_0_is_idle(adev)) return 0; return -ETIMEDOUT; @@ -627,9 +626,9 @@ static int vce_v3_0_wait_for_idle(void *handle) #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) -static bool vce_v3_0_check_soft_reset(void *handle) +static bool vce_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; /* According to VCE team , we should use VCE_STATUS instead @@ -668,9 +667,9 @@ static bool vce_v3_0_check_soft_reset(void *handle) } } -static int vce_v3_0_soft_reset(void *handle) +static int vce_v3_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset; if (!adev->vce.srbm_soft_reset) @@ -699,29 +698,29 @@ static int vce_v3_0_soft_reset(void *handle) return 0; } -static int vce_v3_0_pre_soft_reset(void *handle) +static int vce_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->vce.srbm_soft_reset) return 0; mdelay(5); - return vce_v3_0_suspend(adev); + return vce_v3_0_suspend(ip_block); } -static int vce_v3_0_post_soft_reset(void *handle) +static int vce_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->vce.srbm_soft_reset) return 0; mdelay(5); - return vce_v3_0_resume(adev); + return vce_v3_0_resume(ip_block); } static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 0748bf44c8808..f4d2650e6b7a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -407,9 +407,9 @@ static int vce_v4_0_stop(struct amdgpu_device *adev) return 0; } -static int vce_v4_0_early_init(void *handle) +static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ adev->vce.num_rings = 1; @@ -422,9 +422,9 @@ static int vce_v4_0_early_init(void *handle) return 0; } -static int vce_v4_0_sw_init(void *handle) +static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; unsigned size; @@ -493,10 +493,10 @@ static int vce_v4_0_sw_init(void *handle) return r; } -static int vce_v4_0_sw_fini(void *handle) +static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* free MM table */ amdgpu_virt_free_mm_table(adev); @@ -513,10 +513,10 @@ static int vce_v4_0_sw_fini(void *handle) return amdgpu_vce_sw_fini(adev); } -static int vce_v4_0_hw_init(void *handle) +static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block) { int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) r = vce_v4_0_sriov_start(adev); @@ -536,14 +536,14 @@ static int vce_v4_0_hw_init(void *handle) return 0; } -static int vce_v4_0_hw_fini(void *handle) +static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vce.idle_work); if (!amdgpu_sriov_vf(adev)) { - /* vce_v4_0_wait_for_idle(handle); */ + /* vce_v4_0_wait_for_idle(ip_block); */ vce_v4_0_stop(adev); } else { /* full access mode, so don't touch any VCE register */ @@ -553,9 +553,9 @@ static int vce_v4_0_hw_fini(void *handle) return 0; } -static int vce_v4_0_suspend(void *handle) +static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r, idx; if (adev->vce.vcpu_bo == NULL) @@ -594,16 +594,16 @@ static int vce_v4_0_suspend(void *handle) AMD_CG_STATE_GATE); } - r = vce_v4_0_hw_fini(adev); + r = vce_v4_0_hw_fini(ip_block); if (r) return r; return amdgpu_vce_suspend(adev); } -static int vce_v4_0_resume(void *handle) +static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r, idx; if (adev->vce.vcpu_bo == NULL) @@ -624,7 +624,7 @@ static int vce_v4_0_resume(void *handle) return r; } - return vce_v4_0_hw_init(adev); + return vce_v4_0_hw_init(ip_block); } static void vce_v4_0_mc_resume(struct amdgpu_device *adev) @@ -703,10 +703,10 @@ static bool vce_v4_0_is_idle(void *handle) return !(RREG32(mmSRBM_STATUS2) & mask); } -static int vce_v4_0_wait_for_idle(void *handle) +static int vce_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) if (vce_v4_0_is_idle(handle)) @@ -721,9 +721,9 @@ static int vce_v4_0_wait_for_idle(void *handle) #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) -static bool vce_v4_0_check_soft_reset(void *handle) +static bool vce_v4_0_check_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset = 0; /* According to VCE team , we should use VCE_STATUS instead @@ -762,9 +762,9 @@ static bool vce_v4_0_check_soft_reset(void *handle) } } -static int vce_v4_0_soft_reset(void *handle) +static int vce_v4_0_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 srbm_soft_reset; if (!adev->vce.srbm_soft_reset) @@ -793,9 +793,9 @@ static int vce_v4_0_soft_reset(void *handle) return 0; } -static int vce_v4_0_pre_soft_reset(void *handle) +static int vce_v4_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->vce.srbm_soft_reset) return 0; @@ -806,9 +806,9 @@ static int vce_v4_0_pre_soft_reset(void *handle) } -static int vce_v4_0_post_soft_reset(void *handle) +static int vce_v4_0_post_soft_reset(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->vce.srbm_soft_reset) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a280b9fecb773..add33f1b6aa24 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -45,6 +45,42 @@ #define mmUVD_REG_XX_MASK_1_0 0x05ac #define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -64,9 +100,9 @@ static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring); * Set ring and irq function pointers * Load microcode from filesystem */ -static int vcn_v1_0_early_init(void *handle) +static int vcn_v1_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->vcn.num_enc_rings = 2; @@ -74,7 +110,7 @@ static int vcn_v1_0_early_init(void *handle) vcn_v1_0_set_enc_ring_funcs(adev); vcn_v1_0_set_irq_funcs(adev); - jpeg_v1_0_early_init(handle); + jpeg_v1_0_early_init(ip_block); return amdgpu_vcn_early_init(adev); } @@ -86,11 +122,13 @@ static int vcn_v1_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int vcn_v1_0_sw_init(void *handle) +static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t *ptr; + struct amdgpu_device *adev = ip_block->adev; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, @@ -159,8 +197,16 @@ static int vcn_v1_0_sw_init(void *handle) amdgpu_vcn_fwlog_init(adev->vcn.inst); } - r = jpeg_v1_0_sw_init(handle); + r = jpeg_v1_0_sw_init(ip_block); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return r; } @@ -171,19 +217,21 @@ static int vcn_v1_0_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v1_0_sw_fini(void *handle) +static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_vcn_suspend(adev); if (r) return r; - jpeg_v1_0_sw_fini(handle); + jpeg_v1_0_sw_fini(ip_block); r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -194,9 +242,9 @@ static int vcn_v1_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v1_0_hw_init(void *handle) +static int vcn_v1_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; @@ -224,9 +272,9 @@ static int vcn_v1_0_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v1_0_hw_fini(void *handle) +static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -246,10 +294,10 @@ static int vcn_v1_0_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v1_0_suspend(void *handle) +static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool idle_work_unexecuted; idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -258,7 +306,7 @@ static int vcn_v1_0_suspend(void *handle) amdgpu_dpm_enable_uvd(adev, false); } - r = vcn_v1_0_hw_fini(adev); + r = vcn_v1_0_hw_fini(ip_block); if (r) return r; @@ -274,16 +322,15 @@ static int vcn_v1_0_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v1_0_resume(void *handle) +static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v1_0_hw_init(adev); + r = vcn_v1_0_hw_init(ip_block); return r; } @@ -1336,9 +1383,9 @@ static bool vcn_v1_0_is_idle(void *handle) return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); } -static int vcn_v1_0_wait_for_idle(void *handle) +static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, @@ -1873,10 +1920,70 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) { - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + mod_delayed_work(system_wq, &ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); } +static void vcn_v1_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v1_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -1895,8 +2002,8 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v1_0_dump_ip_state, + .print_ip_state = vcn_v1_0_print_ip_state, }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d3d096909a7f4..c104c47301c7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -53,6 +53,42 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); @@ -69,9 +105,9 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev); * Set ring and irq function pointers * Load microcode from filesystem */ -static int vcn_v2_0_early_init(void *handle) +static int vcn_v2_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) adev->vcn.num_enc_rings = 1; @@ -92,11 +128,13 @@ static int vcn_v2_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int vcn_v2_0_sw_init(void *handle) +static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t *ptr; + struct amdgpu_device *adev = ip_block->adev; volatile struct amdgpu_fw_shared *fw_shared; /* VCN DEC TRAP */ @@ -184,6 +222,15 @@ static int vcn_v2_0_sw_init(void *handle) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(adev->vcn.inst); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -194,10 +241,10 @@ static int vcn_v2_0_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v2_0_sw_fini(void *handle) +static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r, idx; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -213,6 +260,8 @@ static int vcn_v2_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -223,9 +272,9 @@ static int vcn_v2_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v2_0_hw_init(void *handle) +static int vcn_v2_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; @@ -260,9 +309,9 @@ static int vcn_v2_0_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v2_0_hw_fini(void *handle) +static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -281,16 +330,15 @@ static int vcn_v2_0_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v2_0_suspend(void *handle) +static int vcn_v2_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = vcn_v2_0_hw_fini(adev); + r = vcn_v2_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(ip_block->adev); return r; } @@ -302,16 +350,15 @@ static int vcn_v2_0_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v2_0_resume(void *handle) +static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v2_0_hw_init(adev); + r = vcn_v2_0_hw_init(ip_block); return r; } @@ -1277,9 +1324,9 @@ static bool vcn_v2_0_is_idle(void *handle) return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); } -static int vcn_v2_0_wait_for_idle(void *handle) +static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, @@ -1985,6 +2032,66 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } +static void vcn_v2_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v2_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .name = "vcn_v2_0", .early_init = vcn_v2_0_early_init, @@ -2003,8 +2110,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_0_dump_ip_state, + .print_ip_state = vcn_v2_0_print_ip_state, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 96f60c3031610..e76e1168d6f8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,43 @@ #define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); @@ -78,9 +115,9 @@ static int amdgpu_ih_clientid_vcns[] = { * Set ring and irq function pointers * Load microcode from filesystem */ -static int vcn_v2_5_early_init(void *handle) +static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { adev->vcn.num_vcn_inst = 2; @@ -118,11 +155,13 @@ static int vcn_v2_5_early_init(void *handle) * * Load firmware and sw initialization */ -static int vcn_v2_5_sw_init(void *handle) +static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, j, r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t *ptr; + struct amdgpu_device *adev = ip_block->adev; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { if (adev->vcn.harvest_config & (1 << j)) @@ -241,6 +280,15 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -251,10 +299,10 @@ static int vcn_v2_5_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v2_5_sw_fini(void *handle) +static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) { int i, r, idx; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; volatile struct amdgpu_fw_shared *fw_shared; if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -277,6 +325,8 @@ static int vcn_v2_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -287,9 +337,9 @@ static int vcn_v2_5_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v2_5_hw_init(void *handle) +static int vcn_v2_5_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, j, r = 0; @@ -335,9 +385,9 @@ static int vcn_v2_5_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v2_5_hw_fini(void *handle) +static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -365,16 +415,15 @@ static int vcn_v2_5_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v2_5_suspend(void *handle) +static int vcn_v2_5_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = vcn_v2_5_hw_fini(adev); + r = vcn_v2_5_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(ip_block->adev); return r; } @@ -386,16 +435,15 @@ static int vcn_v2_5_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v2_5_resume(void *handle) +static int vcn_v2_5_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v2_5_hw_init(adev); + r = vcn_v2_5_hw_init(ip_block); return r; } @@ -1736,9 +1784,9 @@ static bool vcn_v2_5_is_idle(void *handle) return ret; } -static int vcn_v2_5_wait_for_idle(void *handle) +static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1876,6 +1924,66 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v2_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v2_5_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .name = "vcn_v2_5", .early_init = vcn_v2_5_early_init, @@ -1894,8 +2002,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -1916,8 +2024,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 24f947751c463..51ea9a83204f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,6 +60,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -85,9 +121,9 @@ static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); * Set ring and irq function pointers * Load microcode from filesystem */ -static int vcn_v3_0_early_init(void *handle) +static int vcn_v3_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; @@ -121,12 +157,14 @@ static int vcn_v3_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int vcn_v3_0_sw_init(void *handle) +static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t *ptr; + struct amdgpu_device *adev = ip_block->adev; r = amdgpu_vcn_sw_init(adev); if (r) @@ -246,6 +284,15 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -256,9 +303,9 @@ static int vcn_v3_0_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v3_0_sw_fini(void *handle) +static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r, idx; if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -284,6 +331,7 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); return r; } @@ -294,9 +342,9 @@ static int vcn_v3_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v3_0_hw_init(void *handle) +static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, j, r; @@ -369,9 +417,9 @@ static int vcn_v3_0_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v3_0_hw_fini(void *handle) +static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -399,16 +447,15 @@ static int vcn_v3_0_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v3_0_suspend(void *handle) +static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = vcn_v3_0_hw_fini(adev); + r = vcn_v3_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(ip_block->adev); return r; } @@ -420,16 +467,15 @@ static int vcn_v3_0_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v3_0_resume(void *handle) +static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v3_0_hw_init(adev); + r = vcn_v3_0_hw_init(ip_block); return r; } @@ -2068,9 +2114,9 @@ static bool vcn_v3_0_is_idle(void *handle) return ret; } -static int vcn_v3_0_wait_for_idle(void *handle) +static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -2203,6 +2249,67 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t inst_off; + bool is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v3_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2221,8 +2328,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v3_0_dump_ip_state, + .print_ip_state = vcn_v3_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 776c539bfddac..e33cc611f2154 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -75,9 +111,9 @@ static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev); * Set ring and irq function pointers * Load microcode from filesystem */ -static int vcn_v4_0_early_init(void *handle) +static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; if (amdgpu_sriov_vf(adev)) { @@ -132,11 +168,13 @@ static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx) * * Load firmware and sw initialization */ -static int vcn_v4_0_sw_init(void *handle) +static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -200,6 +238,15 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -210,9 +257,9 @@ static int vcn_v4_0_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v4_0_sw_fini(void *handle) +static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r, idx; if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -239,6 +286,8 @@ static int vcn_v4_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -249,9 +298,9 @@ static int vcn_v4_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v4_0_hw_init(void *handle) +static int vcn_v4_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r; @@ -296,9 +345,9 @@ static int vcn_v4_0_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v4_0_hw_fini(void *handle) +static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -327,16 +376,15 @@ static int vcn_v4_0_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v4_0_suspend(void *handle) +static int vcn_v4_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = vcn_v4_0_hw_fini(adev); + r = vcn_v4_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(ip_block->adev); return r; } @@ -348,16 +396,15 @@ static int vcn_v4_0_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v4_0_resume(void *handle) +static int vcn_v4_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v4_0_hw_init(adev); + r = vcn_v4_0_hw_init(ip_block); return r; } @@ -1930,9 +1977,9 @@ static bool vcn_v4_0_is_idle(void *handle) * * Wait for VCN block idle */ -static int vcn_v4_0_wait_for_idle(void *handle) +static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -2109,6 +2156,67 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .name = "vcn_v4_0", .early_init = vcn_v4_0_early_init, @@ -2127,8 +2235,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_dump_ip_state, + .print_ip_state = vcn_v4_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 9bae95538b628..f02699a92e501 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,42 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + #define NORMALIZE_VCN_REG_OFFSET(offset) \ (offset & 0x1FFFF) @@ -66,9 +102,9 @@ static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, * * Set ring and irq function pointers */ -static int vcn_v4_0_3_early_init(void *handle) +static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; @@ -87,11 +123,13 @@ static int vcn_v4_0_3_early_init(void *handle) * * Load firmware and sw initialization */ -static int vcn_v4_0_3_sw_init(void *handle) +static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r, vcn_inst; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -159,6 +197,15 @@ static int vcn_v4_0_3_sw_init(void *handle) } } + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -169,9 +216,9 @@ static int vcn_v4_0_3_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v4_0_3_sw_fini(void *handle) +static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r, idx; if (drm_dev_enter(&adev->ddev, &idx)) { @@ -194,6 +241,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -204,9 +253,9 @@ static int vcn_v4_0_3_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v4_0_3_hw_init(void *handle) +static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r, vcn_inst; @@ -263,9 +312,9 @@ static int vcn_v4_0_3_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v4_0_3_hw_fini(void *handle) +static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -282,16 +331,15 @@ static int vcn_v4_0_3_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v4_0_3_suspend(void *handle) +static int vcn_v4_0_3_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = vcn_v4_0_3_hw_fini(adev); + r = vcn_v4_0_3_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(ip_block->adev); return r; } @@ -303,16 +351,15 @@ static int vcn_v4_0_3_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v4_0_3_resume(void *handle) +static int vcn_v4_0_3_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v4_0_3_hw_init(adev); + r = vcn_v4_0_3_hw_init(ip_block); return r; } @@ -1522,9 +1569,9 @@ static bool vcn_v4_0_3_is_idle(void *handle) * * Wait for VCN block idle */ -static int vcn_v4_0_3_wait_for_idle(void *handle) +static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1684,6 +1731,68 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; } +static void vcn_v4_0_3_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_3[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_3_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off, inst_id; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_id = GET_INST(VCN, i); + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, inst_id, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_3[j], + inst_id)); + } +} + static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .name = "vcn_v4_0_3", .early_init = vcn_v4_0_3_early_init, @@ -1702,8 +1811,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_3_dump_ip_state, + .print_ip_state = vcn_v4_0_3_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 8d75061f9f384..6d277ee27f463 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -73,9 +109,9 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring); * Set ring and irq function pointers * Load microcode from filesystem */ -static int vcn_v4_0_5_early_init(void *handle) +static int vcn_v4_0_5_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; @@ -92,11 +128,13 @@ static int vcn_v4_0_5_early_init(void *handle) * * Load firmware and sw initialization */ -static int vcn_v4_0_5_sw_init(void *handle) +static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -168,6 +206,14 @@ static int vcn_v4_0_5_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -178,9 +224,9 @@ static int vcn_v4_0_5_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v4_0_5_sw_fini(void *handle) +static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r, idx; if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -207,6 +253,8 @@ static int vcn_v4_0_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -217,9 +265,9 @@ static int vcn_v4_0_5_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v4_0_5_hw_init(void *handle) +static int vcn_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r; @@ -247,9 +295,9 @@ static int vcn_v4_0_5_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v4_0_5_hw_fini(void *handle) +static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -276,16 +324,15 @@ static int vcn_v4_0_5_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v4_0_5_suspend(void *handle) +static int vcn_v4_0_5_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = vcn_v4_0_5_hw_fini(adev); + r = vcn_v4_0_5_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(ip_block->adev); return r; } @@ -297,16 +344,15 @@ static int vcn_v4_0_5_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v4_0_5_resume(void *handle) +static int vcn_v4_0_5_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v4_0_5_hw_init(adev); + r = vcn_v4_0_5_hw_init(ip_block); return r; } @@ -1347,170 +1393,6 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p, - struct amdgpu_job *job) -{ - struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; - - /* if VCN0 is harvested, we can't support AV1 */ - if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) - return -EINVAL; - - scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] - [AMDGPU_RING_PRIO_0].sched; - drm_sched_entity_modify_sched(job->base.entity, scheds, 1); - return 0; -} - -static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, - uint64_t addr) -{ - struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo_va_mapping *map; - uint32_t *msg, num_buffers; - struct amdgpu_bo *bo; - uint64_t start, end; - unsigned int i; - void *ptr; - int r; - - addr &= AMDGPU_GMC_HOLE_MASK; - r = amdgpu_cs_find_mapping(p, addr, &bo, &map); - if (r) { - DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); - return r; - } - - start = map->start * AMDGPU_GPU_PAGE_SIZE; - end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; - if (addr & 0x7) { - DRM_ERROR("VCN messages must be 8 byte aligned!\n"); - return -EINVAL; - } - - bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) { - DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); - return r; - } - - r = amdgpu_bo_kmap(bo, &ptr); - if (r) { - DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); - return r; - } - - msg = ptr + addr - start; - - /* Check length */ - if (msg[1] > end - addr) { - r = -EINVAL; - goto out; - } - - if (msg[3] != RDECODE_MSG_CREATE) - goto out; - - num_buffers = msg[2]; - for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { - uint32_t offset, size, *create; - - if (msg[0] != RDECODE_MESSAGE_CREATE) - continue; - - offset = msg[1]; - size = msg[2]; - - if (offset + size > end) { - r = -EINVAL; - goto out; - } - - create = ptr + addr + offset - start; - - /* H264, HEVC and VP9 can run on any instance */ - if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) - continue; - - r = vcn_v4_0_5_limit_sched(p, job); - if (r) - goto out; - } - -out: - amdgpu_bo_kunmap(bo); - return r; -} - -#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) -#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) - -#define RADEON_VCN_ENGINE_INFO (0x30000001) -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 - -#define RENCODE_ENCODE_STANDARD_AV1 2 -#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 - -/* return the offset in ib if id is found, -1 otherwise - * to speed up the searching we only search upto max_offset - */ -static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) -{ - int i; - - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { - if (ib->ptr[i + 1] == id) - return i; - } - return -1; -} - -static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - struct amdgpu_ring *ring = amdgpu_job_ring(job); - struct amdgpu_vcn_decode_buffer *decode_buffer; - uint64_t addr; - uint32_t val; - int idx; - - /* The first instance can decode anything */ - if (!ring->me) - return 0; - - /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ - idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, - RADEON_VCN_ENGINE_INFO_MAX_OFFSET); - if (idx < 0) /* engine info is missing */ - return 0; - - val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; - - if (!(decode_buffer->valid_buf_flag & 0x1)) - return 0; - - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_5_dec_msg(p, job, addr); - } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); - if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) - return vcn_v4_0_5_limit_sched(p, job); - } - return 0; -} - static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1518,7 +1400,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, - .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + @@ -1590,9 +1471,9 @@ static bool vcn_v4_0_5_is_idle(void *handle) * * Wait for VCN block idle */ -static int vcn_v4_0_5_wait_for_idle(void *handle) +static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1733,6 +1614,67 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_5_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .name = "vcn_v4_0_5", .early_init = vcn_v4_0_5_early_init, @@ -1751,8 +1693,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_5_dump_ip_state, + .print_ip_state = vcn_v4_0_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 68c97fcd539b9..89e813b7ba5df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,6 +37,40 @@ #include +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -58,9 +92,9 @@ static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring); * Set ring and irq function pointers * Load microcode from filesystem */ -static int vcn_v5_0_0_early_init(void *handle) +static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; @@ -78,11 +112,13 @@ static int vcn_v5_0_0_early_init(void *handle) * * Load firmware and sw initialization */ -static int vcn_v5_0_0_sw_init(void *handle) +static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -137,6 +173,14 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -147,9 +191,9 @@ static int vcn_v5_0_0_sw_init(void *handle) * * VCN suspend and free up sw allocation */ -static int vcn_v5_0_0_sw_fini(void *handle) +static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, r, idx; if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -173,6 +217,8 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -183,9 +229,9 @@ static int vcn_v5_0_0_sw_fini(void *handle) * * Initialize the hardware, boot up the VCPU and do some testing */ -static int vcn_v5_0_0_hw_init(void *handle) +static int vcn_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r; @@ -213,9 +259,9 @@ static int vcn_v5_0_0_hw_init(void *handle) * * Stop the VCN block, mark ring as not ready any more */ -static int vcn_v5_0_0_hw_fini(void *handle) +static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i; cancel_delayed_work_sync(&adev->vcn.idle_work); @@ -242,16 +288,15 @@ static int vcn_v5_0_0_hw_fini(void *handle) * * HW fini and suspend VCN block */ -static int vcn_v5_0_0_suspend(void *handle) +static int vcn_v5_0_0_suspend(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = vcn_v5_0_0_hw_fini(adev); + r = vcn_v5_0_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(ip_block->adev); return r; } @@ -263,16 +308,15 @@ static int vcn_v5_0_0_suspend(void *handle) * * Resume firmware and hw init VCN block */ -static int vcn_v5_0_0_resume(void *handle) +static int vcn_v5_0_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(ip_block->adev); if (r) return r; - r = vcn_v5_0_0_hw_init(adev); + r = vcn_v5_0_0_hw_init(ip_block); return r; } @@ -1154,9 +1198,9 @@ static bool vcn_v5_0_0_is_idle(void *handle) * * Wait for VCN block idle */ -static int vcn_v5_0_0_wait_for_idle(void *handle) +static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1297,6 +1341,66 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_5_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1315,8 +1419,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v5_0_dump_ip_state, + .print_ip_state = vcn_v5_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index bf68e18e3824b..73de5909f655b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -472,18 +472,18 @@ static void vega10_ih_set_self_irq_funcs(struct amdgpu_device *adev) adev->irq.self_irq.funcs = &vega10_ih_self_irq_funcs; } -static int vega10_ih_early_init(void *handle) +static int vega10_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega10_ih_set_interrupt_funcs(adev); vega10_ih_set_self_irq_funcs(adev); return 0; } -static int vega10_ih_sw_init(void *handle) +static int vega10_ih_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0, @@ -525,43 +525,35 @@ static int vega10_ih_sw_init(void *handle) return r; } -static int vega10_ih_sw_fini(void *handle) +static int vega10_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); return 0; } -static int vega10_ih_hw_init(void *handle) +static int vega10_ih_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vega10_ih_irq_init(adev); + return vega10_ih_irq_init(ip_block->adev); } -static int vega10_ih_hw_fini(void *handle) +static int vega10_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - vega10_ih_irq_disable(adev); + vega10_ih_irq_disable(ip_block->adev); return 0; } -static int vega10_ih_suspend(void *handle) +static int vega10_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vega10_ih_hw_fini(adev); + return vega10_ih_hw_fini(ip_block); } -static int vega10_ih_resume(void *handle) +static int vega10_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vega10_ih_hw_init(adev); + return vega10_ih_hw_init(ip_block); } static bool vega10_ih_is_idle(void *handle) @@ -570,13 +562,13 @@ static bool vega10_ih_is_idle(void *handle) return true; } -static int vega10_ih_wait_for_idle(void *handle) +static int vega10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return -ETIMEDOUT; } -static int vega10_ih_soft_reset(void *handle) +static int vega10_ih_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index ac439f0565e35..a42404a58015d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -526,18 +526,18 @@ static void vega20_ih_set_self_irq_funcs(struct amdgpu_device *adev) adev->irq.self_irq.funcs = &vega20_ih_self_irq_funcs; } -static int vega20_ih_early_init(void *handle) +static int vega20_ih_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega20_ih_set_interrupt_funcs(adev); vega20_ih_set_self_irq_funcs(adev); return 0; } -static int vega20_ih_sw_init(void *handle) +static int vega20_ih_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool use_bus_addr = true; int r; @@ -586,19 +586,19 @@ static int vega20_ih_sw_init(void *handle) return r; } -static int vega20_ih_sw_fini(void *handle) +static int vega20_ih_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_irq_fini_sw(adev); return 0; } -static int vega20_ih_hw_init(void *handle) +static int vega20_ih_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = vega20_ih_irq_init(adev); if (r) @@ -607,27 +607,21 @@ static int vega20_ih_hw_init(void *handle) return 0; } -static int vega20_ih_hw_fini(void *handle) +static int vega20_ih_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - vega20_ih_irq_disable(adev); + vega20_ih_irq_disable(ip_block->adev); return 0; } -static int vega20_ih_suspend(void *handle) +static int vega20_ih_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vega20_ih_hw_fini(adev); + return vega20_ih_hw_fini(ip_block); } -static int vega20_ih_resume(void *handle) +static int vega20_ih_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vega20_ih_hw_init(adev); + return vega20_ih_hw_init(ip_block); } static bool vega20_ih_is_idle(void *handle) @@ -636,13 +630,13 @@ static bool vega20_ih_is_idle(void *handle) return true; } -static int vega20_ih_wait_for_idle(void *handle) +static int vega20_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return -ETIMEDOUT; } -static int vega20_ih_soft_reset(void *handle) +static int vega20_ih_soft_reset(struct amdgpu_ip_block *ip_block) { /* todo */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index d39c670f62204..4996049dc1995 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1455,9 +1455,9 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = #define CZ_REV_BRISTOL(rev) \ ((rev >= 0xC8 && rev <= 0xCE) || (rev >= 0xE1 && rev <= 0xE6)) -static int vi_common_early_init(void *handle) +static int vi_common_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->flags & AMD_IS_APU) { adev->smc_rreg = &cz_smc_rreg; @@ -1679,9 +1679,9 @@ static int vi_common_early_init(void *handle) return 0; } -static int vi_common_late_init(void *handle) +static int vi_common_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_vi_mailbox_get_irq(adev); @@ -1689,9 +1689,9 @@ static int vi_common_late_init(void *handle) return 0; } -static int vi_common_sw_init(void *handle) +static int vi_common_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) xgpu_vi_mailbox_add_irq_id(adev); @@ -1699,14 +1699,14 @@ static int vi_common_sw_init(void *handle) return 0; } -static int vi_common_sw_fini(void *handle) +static int vi_common_sw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int vi_common_hw_init(void *handle) +static int vi_common_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* move the golden regs per IP block */ vi_init_golden_registers(adev); @@ -1718,9 +1718,9 @@ static int vi_common_hw_init(void *handle) return 0; } -static int vi_common_hw_fini(void *handle) +static int vi_common_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* enable the doorbell aperture */ vi_enable_doorbell_aperture(adev, false); @@ -1731,18 +1731,14 @@ static int vi_common_hw_fini(void *handle) return 0; } -static int vi_common_suspend(void *handle) +static int vi_common_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vi_common_hw_fini(adev); + return vi_common_hw_fini(ip_block); } -static int vi_common_resume(void *handle) +static int vi_common_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return vi_common_hw_init(adev); + return vi_common_hw_init(ip_block); } static bool vi_common_is_idle(void *handle) @@ -1750,12 +1746,12 @@ static bool vi_common_is_idle(void *handle) return true; } -static int vi_common_wait_for_idle(void *handle) +static int vi_common_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int vi_common_soft_reset(void *handle) +static int vi_common_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 80ce42aacc0cc..b61f6b838ec2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -246,6 +246,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdkcl/Makefile b/drivers/gpu/drm/amd/amdkcl/Makefile new file mode 100644 index 0000000000000..5d1a6ec853128 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: MIT +amdkcl-y += main.o kcl_common.o + +amdkcl-y += kcl_kernel_params.o +amdkcl-y += dma-buf/dma-resv.o kcl_dma-resv.o + +amdkcl-y += kcl_backlight.o kcl_ioctl.o \ + kcl_kthread.o kcl_io.o kcl_seq_file.o \ + kcl_suspend.o kcl_pci.o kcl_mm.o kcl_memory.o \ + kcl_fence.o kcl_drm_cache.o \ + kcl_drm_fb.o kcl_drm_print.o kcl_drm_edid.o\ + kcl_drm_crtc.o kcl_drm_connector.o kcl_drm_atomic_helper.o \ + kcl_device_cgroup.o kcl_mn.o kcl_drm_modes.o kcl_time.o \ + kcl_acpi_table.o kcl_page_alloc.o kcl_numa.o kcl_fs_read_write.o kcl_drm_aperture.o \ + kcl_drm_simple_kms_helper.o kcl_bitmap.o kcl_vmscan.o kcl_dma_fence_chain.o \ + kcl_mce_amd.o kcl_workqueue.o kcl_cpumask.o kcl_drm_dsc_helper.o kcl_mm_slab.o \ + kcl_irqdesc.o kcl_drm_suballoc.o kcl_drm_exec.o kcl_drm_dp_helper.o kcl_drm_prime.o \ + kcl_drm_vblank.o + +amdkcl-$(CONFIG_DRM_AMD_DC_HDCP) += kcl_drm_hdcp.o +amdkcl-$(CONFIG_MMU_NOTIFIER) += kcl_mn.o +amdkcl-$(CONFIG_DEBUG_FS) += kcl_debugfs_inode.o kcl_debugfs_file.o +amdkcl-$(CONFIG_SYSFS) += kcl_sysfs_emit.o +amdkcl-$(CONFIG_AMD_WBRF) += kcl_wbrf.o +CFLAGS_kcl_fence.o := -I$(src) + +ccflags-y += \ + -include $(src)/../dkms/config/config.h \ + -include $(src)/kcl_common.h + +ccflags-y += -DHAVE_CONFIG_H + +obj-m += amdkcl.o diff --git a/drivers/gpu/drm/amd/amdkcl/files b/drivers/gpu/drm/amd/amdkcl/files new file mode 100644 index 0000000000000..20534a90dbb86 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/files @@ -0,0 +1 @@ +FILES="ttm/*.c scheduler/*.c amd/amdkcl/dma-buf/dma-resv.c drm_gem_ttm_helper.c drm_buddy.c" diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_acpi_table.c b/drivers/gpu/drm/amd/amdkcl/kcl_acpi_table.c new file mode 100644 index 0000000000000..554bebabd4adb --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_acpi_table.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/****************************************************************************** + * + * Module Name: tbxface - ACPI table-oriented external interfaces + * + * Copyright (C) 2000 - 2020, Intel Corp. + * + *****************************************************************************/ +#include +#include + +#ifndef HAVE_ACPI_PUT_TABLE +amdkcl_dummy_symbol(acpi_put_table, void, return, + struct acpi_table_header *table) +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_backlight.c b/drivers/gpu/drm/amd/amdkcl/kcl_backlight.c new file mode 100644 index 0000000000000..1e1da40b92c05 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_backlight.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Backlight Lowlevel Control Abstraction + * + * Copyright (C) 2003,2004 Hewlett-Packard Company + * + */ +#include + +#ifndef HAVE_BACKLIGHT_DEVICE_SET_BRIGHTNESS +amdkcl_dummy_symbol(backlight_device_set_brightness, int, return 0, + struct backlight_device *bd, unsigned long brightness) +#endif + diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_bitmap.c b/drivers/gpu/drm/amd/amdkcl/kcl_bitmap.c new file mode 100644 index 0000000000000..2b0c29936bc96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_bitmap.c @@ -0,0 +1,75 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#ifndef HAVE_BITMAP_FUNCS + +#include +#include +#include + +unsigned long *kcl_bitmap_alloc(unsigned int nbits, gfp_t flags) +{ + return kmalloc_array(BITS_TO_LONGS(nbits), sizeof(unsigned long), + flags); +} +EXPORT_SYMBOL(kcl_bitmap_alloc); + +unsigned long *kcl_bitmap_zalloc(unsigned int nbits, gfp_t flags) +{ + return kcl_bitmap_alloc(nbits, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(kcl_bitmap_zalloc); + +void kcl_bitmap_free(const unsigned long *bitmap) +{ + kfree(bitmap); +} +EXPORT_SYMBOL(kcl_bitmap_free); +#endif /* HAVE_BITMAP_FUNCS */ + +#ifndef HAVE_BITMAP_TO_ARR32 +#if BITS_PER_LONG == 64 +/** + * kcl_bitmap_to_arr32 - copy the contents of bitmap to a u32 array of bits + * @buf: array of u32 (in host byte order), the dest bitmap + * @bitmap: array of unsigned longs, the source bitmap + * @nbits: number of bits in @bitmap + */ +void kcl_bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits) +{ + unsigned int i, halfwords; + + halfwords = DIV_ROUND_UP(nbits, 32); + for (i = 0; i < halfwords; i++) { + buf[i] = (u32) (bitmap[i/2] & UINT_MAX); + if (++i < halfwords) + buf[i] = (u32) (bitmap[i/2] >> 32); + } + + /* Clear tail bits in last element of array beyond nbits. */ + if (nbits % BITS_PER_LONG) + buf[halfwords - 1] &= (u32) (UINT_MAX >> ((-nbits) & 31)); +} +EXPORT_SYMBOL(kcl_bitmap_to_arr32); +#endif +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_common.c b/drivers/gpu/drm/amd/amdkcl/kcl_common.c new file mode 100644 index 0000000000000..3d11e965887ea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_common.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include +#include +#include +#include + +static unsigned long _kcl_kallsyms_lookup_name(const char *name) +{ + unsigned long addr = 0; +#ifndef HAVE_KALLSYMS_LOOKUP_NAME + struct kprobe kp; + int r; + + memset(&kp, 0, sizeof(kp)); + kp.symbol_name = name; + r = register_kprobe(&kp); + if (!r) { + addr = (unsigned long)kp.addr; + unregister_kprobe(&kp); + } +#else + addr = kallsyms_lookup_name(name); +#endif + + return addr; +} + +void *amdkcl_fp_setup(const char *symbol, void *dummy) +{ + unsigned long addr; + void *fp = dummy; + + addr = _kcl_kallsyms_lookup_name(symbol); + if (addr == 0) { + if (fp) + pr_warn("Warning: fail to get symbol %s, replace it with kcl stub\n", symbol); + else { + pr_err("Error: fail to get symbol %s, abort...\n", symbol); + BUG(); + } + } else { + fp = (void *)addr; + } + + return fp; +} + diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_common.h b/drivers/gpu/drm/amd/amdkcl/kcl_common.h new file mode 100644 index 0000000000000..9c9eca94212b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_common.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef AMDKCL_COMMON_H +#define AMDKCL_COMMON_H + +#include +#include + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "amdkcl: " fmt + +void *amdkcl_fp_setup(const char *symbol, void *dummy); + +/* + * create dummy func + */ +#define amdkcl_dummy_symbol(name, ret_type, ret, ...) \ +ret_type name(__VA_ARGS__) \ +{ \ + pr_warn_once("%s is not supported\n", #name); \ + ret ;\ +} \ +EXPORT_SYMBOL(name); + +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c new file mode 100644 index 0000000000000..f740a9626cd10 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#ifndef for_each_cpu_wrap +/* copied from lib/cpumask.c */ +/** + * cpumask_next_wrap - helper to implement for_each_cpu_wrap + * @n: the cpu prior to the place to search + * @mask: the cpumask pointer + * @start: the start point of the iteration + * @wrap: assume @n crossing @start terminates the iteration + * + * Returns >= nr_cpu_ids on completion + * + * Note: the @wrap argument is required for the start condition when + * we cannot assume @start is set in @mask. + */ +int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +{ + int next; + +again: + next = cpumask_next(n, mask); + + if (wrap && n < start && next >= start) { + return nr_cpumask_bits; + + } else if (next >= nr_cpumask_bits) { + wrap = true; + n = -1; + goto again; + } + + return next; +} +EXPORT_SYMBOL(_kcl_cpumask_next_wrap); +#endif + diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_debugfs_file.c b/drivers/gpu/drm/amd/amdkcl/kcl_debugfs_file.c new file mode 100644 index 0000000000000..def9db4463a22 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_debugfs_file.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * inode.c - part of debugfs, a tiny little debug file system + * + * Copyright (C) 2004,2019 Greg Kroah-Hartman + * Copyright (C) 2004 IBM Inc. + * Copyright (C) 2019 Linux Foundation + * + * debugfs is for people to use instead of /proc or /sys. + * See ./Documentation/core-api/kernel-api.rst for more details. + */ + +#include +#include +#include + +#ifdef KCL_FAKE_DEBUGFS_ATTRIBUTE_SIGNED +/* Copied from fs/libfs.c */ +struct simple_attr { + int (*get)(void *, u64 *); + int (*set)(void *, u64); + char get_buf[24]; /* enough to store a u64 and "\n\0" */ + char set_buf[24]; + void *data; + const char *fmt; /* format for read operation */ + struct mutex mutex; /* protects access to these buffers */ +}; + +static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf, + size_t len, loff_t *ppos, bool is_signed) +{ + struct simple_attr *attr; + unsigned long long val; + size_t size; + ssize_t ret; + + attr = file->private_data; + if (!attr->set) + return -EACCES; + + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + ret = -EFAULT; + size = min(sizeof(attr->set_buf) - 1, len); + if (copy_from_user(attr->set_buf, buf, size)) + goto out; + + attr->set_buf[size] = '\0'; + if (is_signed) + ret = kstrtoll(attr->set_buf, 0, &val); + else + ret = kstrtoull(attr->set_buf, 0, &val); + if (ret) + goto out; + ret = attr->set(attr->data, val); + if (ret == 0) + ret = len; /* on success, claim we got the whole input */ +out: + mutex_unlock(&attr->mutex); + return ret; +} + +ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return simple_attr_write_xsigned(file, buf, len, ppos, true); +} +EXPORT_SYMBOL_GPL(simple_attr_write_signed); + +/* Copied from fs/debugfs/file.c */ +#define F_DENTRY(filp) ((filp)->f_path.dentry) +static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf, + size_t len, loff_t *ppos, bool is_signed) +{ + struct dentry *dentry = F_DENTRY(file); + ssize_t ret; + + ret = debugfs_file_get(dentry); + if (unlikely(ret)) + return ret; + if (is_signed) + ret = simple_attr_write_signed(file, buf, len, ppos); + else + ret = simple_attr_write(file, buf, len, ppos); + debugfs_file_put(dentry); + return ret; +} + +ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return debugfs_attr_write_xsigned(file, buf, len, ppos, true); +} +EXPORT_SYMBOL_GPL(debugfs_attr_write_signed); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_debugfs_inode.c b/drivers/gpu/drm/amd/amdkcl/kcl_debugfs_inode.c new file mode 100644 index 0000000000000..5d41d1e609712 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_debugfs_inode.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * inode.c - part of debugfs, a tiny little debug file system + * + * Copyright (C) 2004,2019 Greg Kroah-Hartman + * Copyright (C) 2004 IBM Inc. + * Copyright (C) 2019 Linux Foundation + * + * debugfs is for people to use instead of /proc or /sys. + * See ./Documentation/core-api/kernel-api.rst for more details. + */ + +#include +#include + +/* Copied from fs/debugfs/inode.c */ +#ifndef HAVE_DEBUGFS_CREATE_FILE_SIZE +void debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size) +{ + struct dentry *de = debugfs_create_file(name, mode, parent, data, fops); + + if (de) + d_inode(de)->i_size = file_size; +} +EXPORT_SYMBOL_GPL(debugfs_create_file_size); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_device_cgroup.c b/drivers/gpu/drm/amd/amdkcl/kcl_device_cgroup.c new file mode 100644 index 0000000000000..1fb1830aa5039 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_device_cgroup.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * device_cgroup.c - device cgroup subsystem + * + * Copyright 2007 IBM Corp + */ +#include +#include + +#if defined(CONFIG_CGROUP_DEVICE) && \ + !defined(HAVE_DEVCGROUP_CHECK_PERMISSION) +/* + * __devcgroup_check_permission is introduced in v3.6-6796-gad676077a2ae + * as: + * static int __devcgroup_check_permission(struct dev_cgroup *dev_cgroup, + * short type, u32 major, u32 minor, + * short access) + * + * prototype change in v3.7-rc2-147-g8c9506d16925 to: + * static int __devcgroup_check_permission(short type, u32 major, u32 minor, + * short access) + * + * the current amdkcl don't support kernel earilier than v3.7-rc2-147-g8c9506d16925 + */ +int (*__kcl_devcgroup_check_permission)(short type, u32 major, u32 minor, + short access); +EXPORT_SYMBOL(__kcl_devcgroup_check_permission); +#endif +void amdkcl_dev_cgroup_init(void) +{ +#if defined(CONFIG_CGROUP_DEVICE) && \ + !defined(HAVE_DEVCGROUP_CHECK_PERMISSION) + __kcl_devcgroup_check_permission = amdkcl_fp_setup("__devcgroup_check_permission", NULL); +#endif +} diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_dma-resv.c b/drivers/gpu/drm/amd/amdkcl/kcl_dma-resv.c new file mode 100644 index 0000000000000..f2a2cdcbf3165 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_dma-resv.c @@ -0,0 +1,934 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2012-2014 Canonical Ltd (Maarten Lankhorst) + * + * Based on bo.c which bears the following copyright notice, + * but is dual licensed: + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom + */ + +/* Copied from drivers/dma-buf/dma-resv.c */ +#ifndef HAVE_DMA_RESV_FENCES + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * DOC: Reservation Object Overview + * + * The reservation object provides a mechanism to manage a container of + * dma_fence object associated with a resource. A reservation object + * can have any number of fences attaches to it. Each fence carries an usage + * parameter determining how the operation represented by the fence is using the + * resource. The RCU mechanism is used to protect read access to fences from + * locked write-side updates. + * + * See struct dma_resv for more details. + */ + +extern struct ww_class reservation_ww_class; + +/** + * dma_resv_list_alloc - allocate fence list + * @shared_max: number of fences we need space for + * + * Allocate a new dma_resv_list and make sure to correctly initialize + * shared_max. + */ +static struct dma_resv_list *dma_resv_list_alloc(unsigned int shared_max) +{ + struct dma_resv_list *list; + + list = kmalloc(struct_size(list, shared, shared_max), GFP_KERNEL); + if (!list) + return NULL; + + list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) / + sizeof(*list->shared); + + return list; +} + +/** + * dma_resv_list_free - free fence list + * @list: list to free + * + * Free a dma_resv_list and make sure to drop all references. + */ +static void dma_resv_list_free(struct dma_resv_list *list) +{ + unsigned int i; + + if (!list) + return; + + for (i = 0; i < list->shared_count; ++i) + dma_fence_put(rcu_dereference_protected(list->shared[i], true)); + + kfree_rcu(list, rcu); +} + +/** + * dma_resv_init - initialize a reservation object + * @obj: the reservation object + */ +void dma_resv_init(struct dma_resv *obj) +{ + ww_mutex_init(&obj->lock, &reservation_ww_class); + seqcount_ww_mutex_init(&obj->seq, &obj->lock); + + RCU_INIT_POINTER(obj->fence, NULL); + RCU_INIT_POINTER(obj->fence_excl, NULL); +} +EXPORT_SYMBOL(dma_resv_init); + +/** + * dma_resv_fini - destroys a reservation object + * @obj: the reservation object + */ +void dma_resv_fini(struct dma_resv *obj) +{ + struct dma_resv_list *fobj; + struct dma_fence *excl; + + /* + * This object should be dead and all references must have + * been released to it, so no need to be protected with rcu. + */ + excl = rcu_dereference_protected(obj->fence_excl, 1); + if (excl) + dma_fence_put(excl); + + fobj = rcu_dereference_protected(obj->fence, 1); + dma_resv_list_free(fobj); + ww_mutex_destroy(&obj->lock); +} +EXPORT_SYMBOL(dma_resv_fini); + +/** + * dma_resv_reserve_fences - Reserve space to add shared fences to + * a dma_resv. + * @obj: reservation object + * @num_fences: number of fences we want to add + * + * Should be called before dma_resv_add_shared_fence(). Must + * be called with @obj locked through dma_resv_lock(). + * + * Note that the preallocated slots need to be re-reserved if @obj is unlocked + * at any time before calling dma_resv_add_shared_fence(). This is validated + * when CONFIG_DEBUG_MUTEXES is enabled. + * + * RETURNS + * Zero for success, or -errno + */ +int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) +{ + struct dma_resv_list *old, *new; + unsigned int i, j, k, max; + + dma_resv_assert_held(obj); + + old = dma_resv_shared_list(obj); + if (old && old->shared_max) { + if ((old->shared_count + num_fences) <= old->shared_max) + return 0; + max = max(old->shared_count + num_fences, old->shared_max * 2); + } else { + max = max(4ul, roundup_pow_of_two(num_fences)); + } + + new = dma_resv_list_alloc(max); + if (!new) + return -ENOMEM; + + /* + * no need to bump fence refcounts, rcu_read access + * requires the use of kref_get_unless_zero, and the + * references from the old struct are carried over to + * the new. + */ + for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) { + struct dma_fence *fence; + + fence = rcu_dereference_protected(old->shared[i], + dma_resv_held(obj)); + if (dma_fence_is_signaled(fence)) + RCU_INIT_POINTER(new->shared[--k], fence); + else + RCU_INIT_POINTER(new->shared[j++], fence); + } + new->shared_count = j; + + /* + * We are not changing the effective set of fences here so can + * merely update the pointer to the new array; both existing + * readers and new readers will see exactly the same set of + * active (unsignaled) shared fences. Individual fences and the + * old array are protected by RCU and so will not vanish under + * the gaze of the rcu_read_lock() readers. + */ + rcu_assign_pointer(obj->fence, new); + + if (!old) + return 0; + + /* Drop the references to the signaled fences */ + for (i = k; i < max; ++i) { + struct dma_fence *fence; + + fence = rcu_dereference_protected(new->shared[i], + dma_resv_held(obj)); + dma_fence_put(fence); + } + kfree_rcu(old, rcu); + + return 0; +} +EXPORT_SYMBOL(dma_resv_reserve_fences); + +#ifdef CONFIG_DEBUG_MUTEXES +/** + * dma_resv_reset_max_fences - reset shared fences for debugging + * @obj: the dma_resv object to reset + * + * Reset the number of pre-reserved shared slots to test that drivers do + * correct slot allocation using dma_resv_reserve_fences(). See also + * &dma_resv_list.shared_max. + */ +void dma_resv_reset_max_fences(struct dma_resv *obj) +{ + struct dma_resv_list *fences = dma_resv_shared_list(obj); + + dma_resv_assert_held(obj); + + /* Test shared fence slot reservation */ + if (fences) + fences->shared_max = fences->shared_count; +} +EXPORT_SYMBOL(dma_resv_reset_max_fences); +#endif + +/** + * dma_resv_add_shared_fence - Add a fence to a shared slot + * @obj: the reservation object + * @fence: the shared fence to add + * + * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and + * dma_resv_reserve_fences() has been called. + * + * See also &dma_resv.fence for a discussion of the semantics. + */ +static void dma_resv_add_shared_fence(struct dma_resv *obj, + struct dma_fence *fence) +{ + struct dma_resv_list *fobj; + struct dma_fence *old; + unsigned int i, count; + + dma_fence_get(fence); + + dma_resv_assert_held(obj); + + /* Drivers should not add containers here, instead add each fence + * individually. + */ + WARN_ON(dma_fence_is_container(fence)); + + fobj = dma_resv_shared_list(obj); + count = fobj->shared_count; + + write_seqcount_begin(&obj->seq); + + for (i = 0; i < count; ++i) { + + old = rcu_dereference_protected(fobj->shared[i], + dma_resv_held(obj)); + if (old->context == fence->context || + dma_fence_is_signaled(old)) + goto replace; + } + + BUG_ON(fobj->shared_count >= fobj->shared_max); + old = NULL; + count++; + +replace: + RCU_INIT_POINTER(fobj->shared[i], fence); + /* pointer update must be visible before we extend the shared_count */ + smp_store_mb(fobj->shared_count, count); + + write_seqcount_end(&obj->seq); + dma_fence_put(old); +} + +/** + * dma_resv_replace_fences - replace fences in the dma_resv obj + * @obj: the reservation object + * @context: the context of the fences to replace + * @replacement: the new fence to use instead + * @usage: how the new fence is used, see enum dma_resv_usage + * + * Replace fences with a specified context with a new fence. Only valid if the + * operation represented by the original fence has no longer access to the + * resources represented by the dma_resv object when the new fence completes. + * + * And example for using this is replacing a preemption fence with a page table + * update fence which makes the resource inaccessible. + */ +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, + struct dma_fence *replacement, + enum dma_resv_usage usage) +{ + struct dma_resv_list *list; + struct dma_fence *old; + unsigned int i; + + /* Only readers supported for now */ + WARN_ON(usage != DMA_RESV_USAGE_READ && usage != DMA_RESV_USAGE_BOOKKEEP); + + dma_resv_assert_held(obj); + + write_seqcount_begin(&obj->seq); + + old = dma_resv_excl_fence(obj); + if (old && old->context == context) { + RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement)); + dma_fence_put(old); + } + + list = dma_resv_shared_list(obj); + for (i = 0; list && i < list->shared_count; ++i) { + old = rcu_dereference_protected(list->shared[i], + dma_resv_held(obj)); + if (old->context != context) + continue; + + rcu_assign_pointer(list->shared[i], dma_fence_get(replacement)); + dma_fence_put(old); + } + + write_seqcount_end(&obj->seq); +} +EXPORT_SYMBOL(dma_resv_replace_fences); + +/** + * dma_resv_add_fence - Add a fence to the dma_resv obj + * @obj: the reservation object + * @fence: the fence to add + * @usage: how the fence is used, see enum dma_resv_usage + * + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and + * dma_resv_reserve_fences() has been called. + * + * See also &dma_resv.fence for a discussion of the semantics. + */ +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage) +{ + struct dma_fence_chain *chain; + + if (usage >= DMA_RESV_USAGE_READ) { + dma_resv_add_shared_fence(obj, fence); + return; + } + + chain = dma_fence_chain_alloc(); + if (unlikely(!chain)) { + /* We are out of memory, block as last resort */ + dma_fence_wait(fence, false); + return; + } + dma_fence_chain_init(chain, dma_resv_excl_fence(obj), dma_fence_get(fence), 1); + + /* Store the usage in the user bit to retrieve it later on */ + chain->base.flags |= usage << DMA_FENCE_FLAG_USER_BITS; + + /* Install the exclusive fence manually */ + write_seqcount_begin(&obj->seq); + RCU_INIT_POINTER(obj->fence_excl, &chain->base); + write_seqcount_end(&obj->seq); +} +EXPORT_SYMBOL(dma_resv_add_fence); + +/* Restart the iterator by initializing all the necessary fields, but not the + * relation to the dma_resv object. */ +static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) +{ + cursor->seq = read_seqcount_begin(&cursor->obj->seq); + cursor->index = -1; + cursor->shared_count = 0; + cursor->excl_fence = NULL; + cursor->kernel_iter = NULL; + if (cursor->usage >= DMA_RESV_USAGE_READ) { + cursor->fences = dma_resv_shared_list(cursor->obj); + if (cursor->fences) + cursor->shared_count = cursor->fences->shared_count; + } else { + cursor->fences = NULL; + } + cursor->is_restarted = true; +} + +/* Walk to the next not signaled fence and grab a reference to it */ +static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) +{ + struct dma_resv *obj = cursor->obj; + struct dma_fence_chain *chain; + struct dma_fence *f; + enum dma_resv_usage usage; + + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence); + + if (cursor->index == -1) { + if (cursor->usage >= DMA_RESV_USAGE_WRITE) { + cursor->fence = dma_resv_excl_fence(obj); + cursor->index++; + if (!cursor->fence) + continue; + } else { + cursor->fence = NULL; + /* Only return KERNEL fences */ + if (!cursor->excl_fence) { + cursor->excl_fence = dma_resv_excl_fence(obj); + if (!cursor->excl_fence) + break; + + cursor->excl_fence = dma_fence_get(cursor->excl_fence); + cursor->kernel_iter = dma_fence_get(cursor->excl_fence); + } + + while ((f = cursor->kernel_iter) != NULL) { + chain = to_dma_fence_chain(f); + if (!chain) { + dma_fence_put(f); + break; + } + + usage = chain->base.flags >> DMA_FENCE_FLAG_USER_BITS; + if (usage == DMA_RESV_USAGE_KERNEL && !dma_fence_is_signaled(chain->fence)) + cursor->fence = chain->fence; + + cursor->kernel_iter = dma_fence_chain_walk(f); + + if (cursor->fence) + break; + } + + if (!cursor->fence) { + dma_fence_put(cursor->excl_fence); + cursor->excl_fence = NULL; + break; + } + } + } else if (!cursor->fences || + cursor->index >= cursor->shared_count) { + cursor->fence = NULL; + break; + + } else { + struct dma_resv_list *fences = cursor->fences; + unsigned int idx = cursor->index++; + + cursor->fence = rcu_dereference(fences->shared[idx]); + } + cursor->fence = dma_fence_get_rcu(cursor->fence); + if (!cursor->fence || !dma_fence_is_signaled(cursor->fence)) + break; + } while (true); +} + +/** + * dma_resv_iter_first_unlocked - first fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Subsequent fences are iterated with dma_resv_iter_next_unlocked(). + * + * Beware that the iterator can be restarted. Code which accumulates statistics + * or similar needs to check for this with dma_resv_iter_is_restarted(). For + * this reason prefer the locked dma_resv_iter_first() whenver possible. + * + * Returns the first fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) +{ + bool restart = false; + + rcu_read_lock(); + do { + if (restart) { + /* drop reference when iter restart */ + dma_fence_put(cursor->excl_fence); + dma_fence_put(cursor->kernel_iter); + } + dma_resv_iter_restart_unlocked(cursor); + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_first_unlocked); + +/** + * dma_resv_iter_next_unlocked - next fence in an unlocked dma_resv obj. + * @cursor: the cursor with the current position + * + * Beware that the iterator can be restarted. Code which accumulates statistics + * or similar needs to check for this with dma_resv_iter_is_restarted(). For + * this reason prefer the locked dma_resv_iter_next() whenver possible. + * + * Returns the next fence from an unlocked dma_resv obj. + */ +struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) +{ + bool restart; + + rcu_read_lock(); + cursor->is_restarted = false; + restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + do { + if (restart) { + /* drop reference when iter restart */ + dma_fence_put(cursor->excl_fence); + dma_fence_put(cursor->kernel_iter); + + dma_resv_iter_restart_unlocked(cursor); + } + dma_resv_iter_walk_unlocked(cursor); + restart = true; + } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + rcu_read_unlock(); + + return cursor->fence; +} +EXPORT_SYMBOL(dma_resv_iter_next_unlocked); + +/** + * dma_resv_iter_first - first fence from a locked dma_resv object + * @cursor: cursor to record the current position + * + * Subsequent fences are iterated with dma_resv_iter_next_unlocked(). + * + * Return the first fence in the dma_resv object while holding the + * &dma_resv.lock. + */ +struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) +{ + struct dma_fence *fence, *f; + struct dma_fence_chain *chain; + enum dma_resv_usage usage; + + dma_resv_assert_held(cursor->obj); + + cursor->index = 0; + if (cursor->usage >= DMA_RESV_USAGE_READ) + cursor->fences = dma_resv_shared_list(cursor->obj); + else + cursor->fences = NULL; + + cursor->kernel_iter = NULL; + fence = dma_resv_excl_fence(cursor->obj); + if (!fence) + fence = dma_resv_iter_next(cursor); + else if (cursor->usage == DMA_RESV_USAGE_KERNEL) { + cursor->kernel_iter = dma_fence_get(fence); + fence = NULL; + + while ((f = cursor->kernel_iter) != NULL) { + chain = to_dma_fence_chain(f); + if (!chain) { + dma_fence_put(f); + break; + } + + cursor->kernel_iter = dma_fence_chain_walk(f); + + usage = chain->base.flags >> DMA_FENCE_FLAG_USER_BITS; + if (usage == DMA_RESV_USAGE_KERNEL) + fence = chain->fence; + + if (fence) + break; + } + } + + cursor->is_restarted = true; + + return fence; +} +EXPORT_SYMBOL_GPL(dma_resv_iter_first); + +/** + * dma_resv_iter_next - next fence from a locked dma_resv object + * @cursor: cursor to record the current position + * + * Return the next fences from the dma_resv object while holding the + * &dma_resv.lock. + */ +struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) +{ + unsigned int idx; + struct dma_fence *f; + struct dma_fence_chain *chain; + enum dma_resv_usage usage; + + dma_resv_assert_held(cursor->obj); + + cursor->is_restarted = false; + + if (cursor->usage == DMA_RESV_USAGE_KERNEL && cursor->kernel_iter != NULL) { + while ((f = cursor->kernel_iter) != NULL) { + chain = to_dma_fence_chain(f); + if (!chain) { + dma_fence_put(f); + break; + } + + cursor->kernel_iter = dma_fence_chain_walk(f); + + usage = chain->base.flags >> DMA_FENCE_FLAG_USER_BITS; + if (usage == DMA_RESV_USAGE_KERNEL && chain->fence) + return chain->fence; + } + } + + if (!cursor->fences || cursor->index >= cursor->fences->shared_count) + return NULL; + + idx = cursor->index++; + return rcu_dereference_protected(cursor->fences->shared[idx], + dma_resv_held(cursor->obj)); +} +EXPORT_SYMBOL_GPL(dma_resv_iter_next); + +/** + * dma_resv_copy_fences - Copy all fences from src to dst. + * @dst: the destination reservation object + * @src: the source reservation object + * + * Copy all fences from src to dst. dst-lock must be held. + */ +int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) +{ + struct dma_resv_iter cursor; + struct dma_resv_list *list; + struct dma_fence *f, *excl; + + dma_resv_assert_held(dst); + + list = NULL; + excl = NULL; + + dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); + dma_resv_for_each_fence_unlocked(&cursor, f) { + + if (dma_resv_iter_is_restarted(&cursor)) { + dma_resv_list_free(list); + dma_fence_put(excl); + + if (cursor.shared_count) { + list = dma_resv_list_alloc(cursor.shared_count); + if (!list) { + dma_resv_iter_end(&cursor); + return -ENOMEM; + } + + list->shared_count = 0; + + } else { + list = NULL; + } + excl = NULL; + } + + dma_fence_get(f); + if (dma_resv_iter_is_exclusive(&cursor)) + excl = f; + else + RCU_INIT_POINTER(list->shared[list->shared_count++], f); + } + dma_resv_iter_end(&cursor); + + write_seqcount_begin(&dst->seq); + excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); + write_seqcount_end(&dst->seq); + + dma_resv_list_free(list); + dma_fence_put(excl); + + return 0; +} +EXPORT_SYMBOL(dma_resv_copy_fences); + +/** + * dma_resv_get_fences - Get an object's shared and exclusive + * fences without update side lock held + * @obj: the reservation object + * @usage: controls which fences to include, see enum dma_resv_usage. + * @num_fences: the number of fences returned + * @fences: the array of fence ptrs returned (array is krealloc'd to the + * required size, and must be freed by caller) + * + * Retrieve all fences from the reservation object. + * Returns either zero or -ENOMEM. + */ +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, + unsigned int *num_fences, struct dma_fence ***fences) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + + *num_fences = 0; + *fences = NULL; + + dma_resv_iter_begin(&cursor, obj, usage); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + + if (dma_resv_iter_is_restarted(&cursor)) { + unsigned int count; + + while (*num_fences) + dma_fence_put((*fences)[--(*num_fences)]); + + count = cursor.shared_count + 1; + + /* Eventually re-allocate the array */ + *fences = krealloc_array(*fences, count, + sizeof(void *), + GFP_KERNEL); + if (count && !*fences) { + dma_resv_iter_end(&cursor); + return -ENOMEM; + } + } + + (*fences)[(*num_fences)++] = dma_fence_get(fence); + } + dma_resv_iter_end(&cursor); + + return 0; +} +EXPORT_SYMBOL_GPL(dma_resv_get_fences); + +/** + * dma_resv_get_singleton - Get a single fence for all the fences + * @obj: the reservation object + * @usage: controls which fences to include, see enum dma_resv_usage. + * @fence: the resulting fence + * + * Get a single fence representing all the fences inside the resv object. + * Returns either 0 for success or -ENOMEM. + * + * Warning: This can't be used like this when adding the fence back to the resv + * object since that can lead to stack corruption when finalizing the + * dma_fence_array. + * + * Returns 0 on success and negative error values on failure. + */ +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, + struct dma_fence **fence) +{ + struct dma_fence_array *array; + struct dma_fence **fences; + unsigned count; + int r; + + r = dma_resv_get_fences(obj, usage, &count, &fences); + if (r) + return r; + + if (count == 0) { + *fence = NULL; + return 0; + } + + if (count == 1) { + *fence = fences[0]; + kfree(fences); + return 0; + } + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) { + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; + } + + *fence = &array->base; + return 0; +} +EXPORT_SYMBOL_GPL(dma_resv_get_singleton); + +/** + * dma_resv_wait_timeout - Wait on reservation's objects + * shared and/or exclusive fences. + * @obj: the reservation object + * @usage: controls which fences to include, see enum dma_resv_usage. + * @intr: if true, do interruptible wait + * @timeout: timeout value in jiffies or zero to return immediately + * + * Callers are not required to hold specific locks, but maybe hold + * dma_resv_lock() already + * RETURNS + * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or + * greater than zer on success. + */ +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout) +{ + long ret = timeout ? timeout : 1; + struct dma_resv_iter cursor; + struct dma_fence *fence; + + dma_resv_iter_begin(&cursor, obj, usage); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + + ret = dma_fence_wait_timeout(fence, intr, ret); + if (ret <= 0) { + dma_resv_iter_end(&cursor); + return ret; + } + } + dma_resv_iter_end(&cursor); + + return ret; +} +EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); + +/** + * dma_resv_test_signaled - Test if a reservation object's fences have been + * signaled. + * @obj: the reservation object + * @usage: controls which fences to include, see enum dma_resv_usage. + * + * Callers are not required to hold specific locks, but maybe hold + * dma_resv_lock() already. + * + * RETURNS + * + * True if all fences signaled, else false. + */ +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + + dma_resv_iter_begin(&cursor, obj, usage); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + dma_resv_iter_end(&cursor); + return false; + } + dma_resv_iter_end(&cursor); + return true; +} +EXPORT_SYMBOL_GPL(dma_resv_test_signaled); + +/** + * dma_resv_describe - Dump description of the resv object into seq_file + * @obj: the reservation object + * @seq: the seq_file to dump the description into + * + * Dump a textual description of the fences inside an dma_resv object into the + * seq_file. + */ +void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) +{ + static const char *usage[] = { "write", "read" }; + struct dma_resv_iter cursor; + struct dma_fence *fence; + + dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { + seq_printf(seq, "\t%s fence:", + usage[dma_resv_iter_usage(&cursor)]); + dma_fence_describe(fence, seq); + } +} +EXPORT_SYMBOL_GPL(dma_resv_describe); + +#if IS_ENABLED(CONFIG_LOCKDEP) +static int __init dma_resv_lockdep(void) +{ + struct mm_struct *mm = mm_alloc(); + struct ww_acquire_ctx ctx; + struct dma_resv obj; + struct address_space mapping; + int ret; + + if (!mm) + return -ENOMEM; + + dma_resv_init(&obj); + address_space_init_once(&mapping); + + mmap_read_lock(mm); + ww_acquire_init(&ctx, &reservation_ww_class); + ret = dma_resv_lock(&obj, &ctx); + if (ret == -EDEADLK) + dma_resv_lock_slow(&obj, &ctx); + fs_reclaim_acquire(GFP_KERNEL); + /* for unmap_mapping_range on trylocked buffer objects in shrinkers */ + i_mmap_lock_write(&mapping); + i_mmap_unlock_write(&mapping); +#ifdef CONFIG_MMU_NOTIFIER + lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); + __dma_fence_might_wait(); + lock_map_release(&__mmu_notifier_invalidate_range_start_map); +#else + __dma_fence_might_wait(); +#endif + fs_reclaim_release(GFP_KERNEL); + ww_mutex_unlock(&obj.lock); + ww_acquire_fini(&ctx); + mmap_read_unlock(mm); + + mmput(mm); + + return 0; +} +subsys_initcall(dma_resv_lockdep); +#endif + + + + +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_dma_fence_chain.c b/drivers/gpu/drm/amd/amdkcl/kcl_dma_fence_chain.c new file mode 100644 index 0000000000000..aef948cfe4ad2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_dma_fence_chain.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fence-chain: chain fences together in a timeline + * + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * Authors: + * Christian König + */ + +#if !defined(HAVE_STRUCT_DMA_FENCE_CHAIN) + +#include +#include + +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence); + +/** + * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence + * @chain: chain node to get the previous node from + * + * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the + * chain node. + */ +static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain) +{ + struct dma_fence *prev; + + rcu_read_lock(); + prev = dma_fence_get_rcu_safe(&chain->prev); + rcu_read_unlock(); + return prev; +} + +/** + * dma_fence_chain_walk - chain walking function + * @fence: current chain node + * + * Walk the chain to the next node. Returns the next fence or NULL if we are at + * the end of the chain. Garbage collects chain nodes which are already + * signaled. + */ +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence) +{ + struct dma_fence_chain *chain, *prev_chain; + struct dma_fence *prev, *replacement, *tmp; + + chain = to_dma_fence_chain(fence); + if (!chain) { + dma_fence_put(fence); + return NULL; + } + + while ((prev = dma_fence_chain_get_prev(chain))) { + + prev_chain = to_dma_fence_chain(prev); + if (prev_chain) { + if (!dma_fence_is_signaled(prev_chain->fence)) + break; + + replacement = dma_fence_chain_get_prev(prev_chain); + } else { + if (!dma_fence_is_signaled(prev)) + break; + + replacement = NULL; + } + + tmp = cmpxchg((struct dma_fence __force **)&chain->prev, + prev, replacement); + if (tmp == prev) + dma_fence_put(tmp); + else + dma_fence_put(replacement); + dma_fence_put(prev); + } + + dma_fence_put(fence); + return prev; +} +EXPORT_SYMBOL(dma_fence_chain_walk); + +/** + * dma_fence_chain_find_seqno - find fence chain node by seqno + * @pfence: pointer to the chain node where to start + * @seqno: the sequence number to search for + * + * Advance the fence pointer to the chain node which will signal this sequence + * number. If no sequence number is provided then this is a no-op. + * + * Returns EINVAL if the fence is not a chain node or the sequence number has + * not yet advanced far enough. + */ +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno) +{ + struct dma_fence_chain *chain; + + if (!seqno) + return 0; + + chain = to_dma_fence_chain(*pfence); + if (!chain || chain->base.seqno < seqno) + return -EINVAL; + + dma_fence_chain_for_each(*pfence, &chain->base) { + if ((*pfence)->context != chain->base.context || + to_dma_fence_chain(*pfence)->prev_seqno < seqno) + break; + } + dma_fence_put(&chain->base); + + return 0; +} +EXPORT_SYMBOL(dma_fence_chain_find_seqno); + +static const char *dma_fence_chain_get_driver_name(struct dma_fence *fence) +{ + return "dma_fence_chain"; +} + +static const char *dma_fence_chain_get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static void dma_fence_chain_irq_work(struct irq_work *work) +{ + struct dma_fence_chain *chain; + + chain = container_of(work, typeof(*chain), work); + + /* Try to rearm the callback */ + if (!dma_fence_chain_enable_signaling(&chain->base)) + /* Ok, we are done. No more unsignaled fences left */ + dma_fence_signal(&chain->base); + dma_fence_put(&chain->base); +} + +static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct dma_fence_chain *chain; + + chain = container_of(cb, typeof(*chain), cb); + init_irq_work(&chain->work, dma_fence_chain_irq_work); + irq_work_queue(&chain->work); + dma_fence_put(f); +} + +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence) +{ + struct dma_fence_chain *head = to_dma_fence_chain(fence); + + dma_fence_get(&head->base); + dma_fence_chain_for_each(fence, &head->base) { + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *f = chain ? chain->fence : fence; + + dma_fence_get(f); + if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) { + dma_fence_put(fence); + return true; + } + dma_fence_put(f); + } + dma_fence_put(&head->base); + return false; +} + +static bool dma_fence_chain_signaled(struct dma_fence *fence) +{ + dma_fence_chain_for_each(fence, fence) { + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *f = chain ? chain->fence : fence; + + if (!dma_fence_is_signaled(f)) { + dma_fence_put(fence); + return false; + } + } + + return true; +} + +static void dma_fence_chain_release(struct dma_fence *fence) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *prev; + + /* Manually unlink the chain as much as possible to avoid recursion + * and potential stack overflow. + */ + while ((prev = rcu_dereference_protected(chain->prev, true))) { + struct dma_fence_chain *prev_chain; + + if (kref_read(&prev->refcount) > 1) + break; + + prev_chain = to_dma_fence_chain(prev); + if (!prev_chain) + break; + + /* No need for atomic operations since we hold the last + * reference to prev_chain. + */ + chain->prev = prev_chain->prev; + RCU_INIT_POINTER(prev_chain->prev, NULL); + dma_fence_put(prev); + } + dma_fence_put(prev); + + dma_fence_put(chain->fence); + dma_fence_free(fence); +} + +const struct dma_fence_ops dma_fence_chain_ops = { +#ifdef HAVE_DMA_FENCE_OPS_USE_64BIT_SEQNO + .use_64bit_seqno = true, +#endif + .get_driver_name = dma_fence_chain_get_driver_name, + .get_timeline_name = dma_fence_chain_get_timeline_name, + .enable_signaling = dma_fence_chain_enable_signaling, + .signaled = dma_fence_chain_signaled, + .release = dma_fence_chain_release, +}; +EXPORT_SYMBOL(dma_fence_chain_ops); + +/** + * dma_fence_chain_init - initialize a fence chain + * @chain: the chain node to initialize + * @prev: the previous fence + * @fence: the current fence + * @seqno: the sequence number to use for the fence chain + * + * Initialize a new chain node and either start a new chain or add the node to + * the existing chain of the previous fence. + */ +void dma_fence_chain_init(struct dma_fence_chain *chain, + struct dma_fence *prev, + struct dma_fence *fence, + uint64_t seqno) +{ + struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev); + uint64_t context; + + spin_lock_init(&chain->lock); + rcu_assign_pointer(chain->prev, prev); + chain->fence = fence; + chain->prev_seqno = 0; + + /* Try to reuse the context of the previous chain node. */ +#ifdef HAVE_DMA_FENCE_OPS_USE_64BIT_SEQNO + if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) { +#else + if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) { +#endif + context = prev->context; + chain->prev_seqno = prev->seqno; + } else { + context = dma_fence_context_alloc(1); + /* Make sure that we always have a valid sequence number. */ + if (prev_chain) + seqno = max(prev->seqno, seqno); + } + + dma_fence_init(&chain->base, &dma_fence_chain_ops, + &chain->lock, context, seqno); +} +EXPORT_SYMBOL(dma_fence_chain_init); +#endif /* HAVE_STRUCT_DMA_FENCE_CHAIN */ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_aperture.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_aperture.c new file mode 100644 index 0000000000000..91f2508079ff1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_aperture.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MIT + +#ifndef HAVE_DRM_DRM_APERTURE_H + +#include +#include + +#include +#include +#include "kcl_fbmem.h" + +int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name) +{ + int ret = 0; + + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ +#if IS_REACHABLE(CONFIG_FB) + ret = _kcl_remove_conflicting_pci_framebuffers(pdev, name); +#endif +#ifdef HAVE_VGA_REMOVE_VGACON + if (ret == 0) + ret = vga_remove_vgacon(pdev); +#endif + return ret; +} +EXPORT_SYMBOL(drm_aperture_remove_conflicting_pci_framebuffers); +#endif /* HAVE_DRM_DRM_APERTURE_H */ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_atomic_helper.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_atomic_helper.c new file mode 100644 index 0000000000000..c0f145df309d3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_atomic_helper.c @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2018 Intel Corp. + * Copyright (C) 2014 Red Hat + * Copyright (C) 2014 Intel Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark + * Daniel Vetter + */ +#include +#include +#include + +#ifdef AMDKCL__DRM_ATOMIC_HELPER_PLANE_RESET +/* Copied from drivers/gpu/drm/drm_atomic_state_helper.c and modified for KCL */ +void _kcl__drm_atomic_helper_plane_reset(struct drm_plane *plane, + struct drm_plane_state *state) +{ + state->plane = plane; + state->rotation = DRM_MODE_ROTATE_0; + +#ifdef DRM_BLEND_ALPHA_OPAQUE + state->alpha = DRM_BLEND_ALPHA_OPAQUE; +#endif +#ifdef DRM_MODE_BLEND_PREMULTI + state->pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; +#endif + + plane->state = state; +} +EXPORT_SYMBOL(_kcl__drm_atomic_helper_plane_reset); +#endif + +#ifndef HAVE___DRM_ATOMIC_HELPER_CRTC_RESET +/* Copied from drivers/gpu/drm/drm_atomic_state_helper.c */ +void +__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc, + struct drm_crtc_state *crtc_state) +{ + if (crtc_state) + crtc_state->crtc = crtc; + + crtc->state = crtc_state; +} +EXPORT_SYMBOL(__drm_atomic_helper_crtc_reset); +#endif + +#ifndef HAVE_DRM_ATOMIC_HELPER_CALC_TIMESTAMPING_CONSTANTS +/* + * This implementation is duplicated from v5.9-rc5-1595-ge1ad957d45f7 + * "Extract drm_atomic_helper_calc_timestamping_constants()" + * + */ +void drm_atomic_helper_calc_timestamping_constants(struct drm_atomic_state *state) +{ + struct drm_crtc_state *new_crtc_state; + struct drm_crtc *crtc; + +#if !defined(for_each_new_crtc_in_state) + struct drm_device *dev = state->dev; + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + new_crtc_state = crtc->state; +#else + int i; + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { +#endif + if (new_crtc_state->enable) + drm_calc_timestamping_constants(crtc, + &new_crtc_state->adjusted_mode); + } +} +EXPORT_SYMBOL(drm_atomic_helper_calc_timestamping_constants); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_cache.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_cache.c new file mode 100644 index 0000000000000..85894bf3907c6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_cache.c @@ -0,0 +1,66 @@ +/* + * \file drm_memory.c + * Memory management wrappers for DRM + * + * \author Rickard E. (Rik) Faith + * \author Gareth Hughes + */ + +/* + * Created: Thu Feb 4 14:00:34 1999 by faith@valinux.com + * + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include + +#include +#include + +/* Copied from drivers/gpu/drm/drm_memory.c */ +#if !defined(HAVE_DRM_NEED_SWIOTLB) +bool drm_need_swiotlb(int dma_bits) +{ + struct resource *tmp; + resource_size_t max_iomem = 0; + + /* + * Xen paravirtual hosts require swiotlb regardless of requested dma + * transfer size. + * + * NOTE: Really, what it requires is use of the dma_alloc_coherent + * allocator used in ttm_dma_populate() instead of + * ttm_populate_and_map_pages(), which bounce buffers so much in + * Xen it leads to swiotlb buffer exhaustion. + */ + if (xen_pv_domain()) + return true; + + for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling) { + max_iomem = max(max_iomem, tmp->end); + } + + return max_iomem > ((u64)1 << dma_bits); +} +EXPORT_SYMBOL(drm_need_swiotlb); +#endif /* HAVE_DRM_NEED_SWIOTLB */ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_connector.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_connector.c new file mode 100644 index 0000000000000..7616f113d4afb --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_connector.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2016 Intel Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ +#include + +#ifndef HAVE_DRM_CONNECTOR_INIT_WITH_DDC +int _kcl_drm_connector_init_with_ddc(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc) +{ + return drm_connector_init(dev, connector, funcs, connector_type); +} +EXPORT_SYMBOL(_kcl_drm_connector_init_with_ddc); +#endif + +#ifndef HAVE_DRM_MODE_CONFIG_DP_SUBCONNECTOR_PROPERTY +amdkcl_dummy_symbol(drm_connector_attach_dp_subconnector_property, void, return, + struct drm_connector *connector) +amdkcl_dummy_symbol(drm_dp_set_subconnector_property, void, return, + struct drm_connector *connector, enum drm_connector_status status, + const u8 *dpcd, const u8 prot_cap[4]) +#endif + +#ifndef HAVE_DRM_CONNECTOR_ATOMIC_HDR_METADATA_EQUAL + +bool drm_connector_atomic_hdr_metadata_equal(struct drm_connector_state *old_state, + struct drm_connector_state *new_state) +{ +#ifdef HAVE_DRM_CONNECTOR_STATE_HDR_OUTPUT_METADATA + struct drm_property_blob *old_blob = old_state->hdr_output_metadata; + struct drm_property_blob *new_blob = new_state->hdr_output_metadata; + + if (!old_blob || !new_blob) + return old_blob == new_blob; + + if (old_blob->length != new_blob->length) + return false; + + return !memcmp(old_blob->data, new_blob->data, old_blob->length); +#else + return false; +#endif +} +EXPORT_SYMBOL(drm_connector_atomic_hdr_metadata_equal); +#endif + +#if !defined(HAVE_DRM_CONNECTOR_ATTACH_HDR_OUTPUT_METADATA_PROPERTY) +int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector) +{ +#ifdef HAVE_DRM_CONNECTOR_ATTACH_HDR_OUTPUT_METADATA_PROPERTY + struct drm_device *dev = connector->dev; + struct drm_property *prop = dev->mode_config.hdr_output_metadata_property; + + drm_object_attach_property(&connector->base, prop, 0); +#endif + + return 0; +} +EXPORT_SYMBOL(drm_connector_attach_hdr_output_metadata_property); +#endif + +#if !defined(HAVE_DRM_CONNECTOR_SET_PANEL_ORIENTATION_WITH_QUIRK) +int _kcl_drm_connector_set_panel_orientation_with_quirk( + struct drm_connector *connector, + enum drm_panel_orientation panel_orientation, + int width, int height) +{ + return drm_connector_init_panel_orientation_property(connector, width, height); +} +EXPORT_SYMBOL(_kcl_drm_connector_set_panel_orientation_with_quirk); +#endif + +#ifndef HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY +struct drm_property *prop = NULL; +int _kcl_drm_connector_attach_colorspace_property(struct drm_connector *connector) +{ + if(prop) + drm_object_attach_property(&connector->base, prop, DRM_MODE_COLORIMETRY_DEFAULT); + + return 0; +} +EXPORT_SYMBOL(_kcl_drm_connector_attach_colorspace_property); +#endif + +#ifdef KCL_DRM_MODE_CREATE_COLORSPACE_PROPERTY +/* copy from drivers/gpu/drm/drm_connector.c (v6.1-5788-gac3470b13f0d) */ +static const char * const colorspace_names[] = { + /* For Default case, driver will set the colorspace */ + [DRM_MODE_COLORIMETRY_DEFAULT] = "Default", + /* Standard Definition Colorimetry based on CEA 861 */ + [DRM_MODE_COLORIMETRY_SMPTE_170M_YCC] = "SMPTE_170M_YCC", + [DRM_MODE_COLORIMETRY_BT709_YCC] = "BT709_YCC", + /* Standard Definition Colorimetry based on IEC 61966-2-4 */ + [DRM_MODE_COLORIMETRY_XVYCC_601] = "XVYCC_601", + /* High Definition Colorimetry based on IEC 61966-2-4 */ + [DRM_MODE_COLORIMETRY_XVYCC_709] = "XVYCC_709", + /* Colorimetry based on IEC 61966-2-1/Amendment 1 */ + [DRM_MODE_COLORIMETRY_SYCC_601] = "SYCC_601", + /* Colorimetry based on IEC 61966-2-5 [33] */ + [DRM_MODE_COLORIMETRY_OPYCC_601] = "opYCC_601", + /* Colorimetry based on IEC 61966-2-5 */ + [DRM_MODE_COLORIMETRY_OPRGB] = "opRGB", + /* Colorimetry based on ITU-R BT.2020 */ + [DRM_MODE_COLORIMETRY_BT2020_CYCC] = "BT2020_CYCC", + /* Colorimetry based on ITU-R BT.2020 */ + [DRM_MODE_COLORIMETRY_BT2020_RGB] = "BT2020_RGB", + /* Colorimetry based on ITU-R BT.2020 */ + [DRM_MODE_COLORIMETRY_BT2020_YCC] = "BT2020_YCC", + /* Added as part of Additional Colorimetry Extension in 861.G */ + [DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65] = "DCI-P3_RGB_D65", + [DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER] = "DCI-P3_RGB_Theater", + [DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED] = "RGB_WIDE_FIXED", + /* Colorimetry based on scRGB (IEC 61966-2-2) */ + [DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT] = "RGB_WIDE_FLOAT", + [DRM_MODE_COLORIMETRY_BT601_YCC] = "BT601_YCC", +}; + +static const u32 hdmi_colorspaces = + BIT(DRM_MODE_COLORIMETRY_SMPTE_170M_YCC) | + BIT(DRM_MODE_COLORIMETRY_BT709_YCC) | + BIT(DRM_MODE_COLORIMETRY_XVYCC_601) | + BIT(DRM_MODE_COLORIMETRY_XVYCC_709) | + BIT(DRM_MODE_COLORIMETRY_SYCC_601) | + BIT(DRM_MODE_COLORIMETRY_OPYCC_601) | + BIT(DRM_MODE_COLORIMETRY_OPRGB) | + BIT(DRM_MODE_COLORIMETRY_BT2020_CYCC) | + BIT(DRM_MODE_COLORIMETRY_BT2020_RGB) | + BIT(DRM_MODE_COLORIMETRY_BT2020_YCC) | + BIT(DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65) | + BIT(DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER); + +static const u32 dp_colorspaces = + BIT(DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED) | + BIT(DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT) | + BIT(DRM_MODE_COLORIMETRY_OPRGB) | + BIT(DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65) | + BIT(DRM_MODE_COLORIMETRY_BT2020_RGB) | + BIT(DRM_MODE_COLORIMETRY_BT601_YCC) | + BIT(DRM_MODE_COLORIMETRY_BT709_YCC) | + BIT(DRM_MODE_COLORIMETRY_XVYCC_601) | + BIT(DRM_MODE_COLORIMETRY_XVYCC_709) | + BIT(DRM_MODE_COLORIMETRY_SYCC_601) | + BIT(DRM_MODE_COLORIMETRY_OPYCC_601) | + BIT(DRM_MODE_COLORIMETRY_BT2020_CYCC) | + BIT(DRM_MODE_COLORIMETRY_BT2020_YCC); + +static int _kcl_drm_mode_create_colorspace_property(struct drm_connector *connector, + u32 supported_colorspaces) +{ + struct drm_device *dev = connector->dev; + u32 colorspaces = supported_colorspaces | BIT(DRM_MODE_COLORIMETRY_DEFAULT); + struct drm_prop_enum_list enum_list[DRM_MODE_COLORIMETRY_COUNT]; + int i, len; + +#ifdef HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY + if (connector->colorspace_property) +#else + if (prop) +#endif + return 0; + + + if (!supported_colorspaces) { + drm_err(dev, "No supported colorspaces provded on [CONNECTOR:%d:%s]\n", + connector->base.id, connector->name); + return -EINVAL; + } + + if ((supported_colorspaces & -BIT(DRM_MODE_COLORIMETRY_COUNT)) != 0) { + drm_err(dev, "Unknown colorspace provded on [CONNECTOR:%d:%s]\n", + connector->base.id, connector->name); + return -EINVAL; + } + + len = 0; + for (i = 0; i < DRM_MODE_COLORIMETRY_COUNT; i++) { + if ((colorspaces & BIT(i)) == 0) + continue; + + enum_list[len].type = i; + enum_list[len].name = colorspace_names[i]; + len++; + } +#ifdef HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY + connector->colorspace_property = +#else + prop = +#endif + drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, "Colorspace", + enum_list, + len); + +#ifdef HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY + if (!connector->colorspace_property) +#else + if (!prop) +#endif + return -ENOMEM; + + return 0; +} +#endif /* KCL_DRM_MODE_CREATE_COLORSPACE_PROPERTY */ + +#ifndef HAVE_DRM_MODE_CREATE_HDMI_COLORSPACE_PROPERTY_2ARGS +int _kcl_drm_mode_create_hdmi_colorspace_property(struct drm_connector *connector, + u32 supported_colorspaces) +{ + u32 colorspaces; + + if (supported_colorspaces) + colorspaces = supported_colorspaces & hdmi_colorspaces; + else + colorspaces = hdmi_colorspaces; + + return _kcl_drm_mode_create_colorspace_property(connector, colorspaces); +} +EXPORT_SYMBOL(_kcl_drm_mode_create_hdmi_colorspace_property); +#endif + +#ifndef HAVE_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_2ARGS +int _kcl_drm_mode_create_dp_colorspace_property(struct drm_connector *connector, + u32 supported_colorspaces) +{ + u32 colorspaces; + + if (supported_colorspaces) + colorspaces = supported_colorspaces & dp_colorspaces; + else + colorspaces = dp_colorspaces; + + return _kcl_drm_mode_create_colorspace_property(connector, colorspaces); +} +EXPORT_SYMBOL(_kcl_drm_mode_create_dp_colorspace_property); +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_crtc.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_crtc.c new file mode 100644 index 0000000000000..c4e079c49d8ab --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_crtc.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2006-2008 Intel Corporation + * Copyright (c) 2007 Dave Airlie + * Copyright (c) 2008 Red Hat Inc. + * + * DRM core CRTC related functions + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Authors: + * Keith Packard + * Eric Anholt + * Dave Airlie + * Jesse Barnes + */ +#include + +#ifndef HAVE_DRM_HELPER_FORCE_DISABLE_ALL +int _kcl_drm_helper_force_disable_all(struct drm_device *dev) +{ + struct drm_crtc *crtc; + int ret = 0; + + drm_modeset_lock_all(dev); + drm_for_each_crtc(crtc, dev) + if (crtc->enabled) { + struct drm_mode_set set = { + .crtc = crtc, + }; + + ret = drm_mode_set_config_internal(&set); + if (ret) + goto out; + } +out: + drm_modeset_unlock_all(dev); + return ret; +} +EXPORT_SYMBOL(_kcl_drm_helper_force_disable_all); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_dp_helper.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_dp_helper.c new file mode 100644 index 0000000000000..c27581210a3d6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_dp_helper.c @@ -0,0 +1,118 @@ +/* + * Copyright © 2009 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include +#include +#include + +#ifndef HAVE_DRM_DP_READ_DPCD_CAPS +static int _kcl_drm_dp_read_extended_dpcd_caps(struct drm_dp_aux *aux, + u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + u8 dpcd_ext[DP_RECEIVER_CAP_SIZE]; + int ret; + struct drm_device *drm_dev = NULL; + + if (aux) { + struct drm_dp_mst_topology_mgr *mgr = + container_of(&aux, struct drm_dp_mst_topology_mgr, aux); + drm_dev = mgr->dev; + } + /* + * Prior to DP1.3 the bit represented by + * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved. + * If it is set DP_DPCD_REV at 0000h could be at a value less than + * the true capability of the panel. The only way to check is to + * then compare 0000h and 2200h. + */ + if (!(dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT)) + return 0; + + ret = drm_dp_dpcd_read(aux, DP_DP13_DPCD_REV, &dpcd_ext, + sizeof(dpcd_ext)); + if (ret < 0) + return ret; + if (ret != sizeof(dpcd_ext)) + return -EIO; + + if (dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) { + drm_dbg_kms( + drm_dev, + "%s: Extended DPCD rev less than base DPCD rev (%d > %d)\n", + aux->name, dpcd[DP_DPCD_REV], dpcd_ext[DP_DPCD_REV]); + return 0; + } + + if (!memcmp(dpcd, dpcd_ext, sizeof(dpcd_ext))) + return 0; + + drm_dbg_kms(drm_dev, "%s: Base DPCD: %*ph\n", aux->name, + DP_RECEIVER_CAP_SIZE, dpcd); + + memcpy(dpcd, dpcd_ext, sizeof(dpcd_ext)); + + return 0; +} + +/** + * drm_dp_read_dpcd_caps() - read DPCD caps and extended DPCD caps if + * available + * @aux: DisplayPort AUX channel + * @dpcd: Buffer to store the resulting DPCD in + * + * Attempts to read the base DPCD caps for @aux. Additionally, this function + * checks for and reads the extended DPRX caps (%DP_DP13_DPCD_REV) if + * present. + * + * Returns: %0 if the DPCD was read successfully, negative error code + * otherwise. + */ +int _kcl_drm_dp_read_dpcd_caps(struct drm_dp_aux *aux, + u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + int ret; + struct drm_device *drm_dev = NULL; + + if (aux) { + struct drm_dp_mst_topology_mgr *mgr = + container_of(&aux, struct drm_dp_mst_topology_mgr, aux); + drm_dev = mgr->dev; + } + + ret = drm_dp_dpcd_read(aux, DP_DPCD_REV, dpcd, DP_RECEIVER_CAP_SIZE); + if (ret < 0) + return ret; + if (ret != DP_RECEIVER_CAP_SIZE || dpcd[DP_DPCD_REV] == 0) + return -EIO; + + ret = _kcl_drm_dp_read_extended_dpcd_caps(aux, dpcd); + if (ret < 0) + return ret; + + drm_dbg_kms(drm_dev, "%s: DPCD: %*ph\n", aux->name, + DP_RECEIVER_CAP_SIZE, dpcd); + + return ret; +} +EXPORT_SYMBOL(_kcl_drm_dp_read_dpcd_caps); +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_dsc_helper.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_dsc_helper.c new file mode 100644 index 0000000000000..8c799582dbbd7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_dsc_helper.c @@ -0,0 +1,332 @@ +/** + * drm_dsc_pps_payload_pack() - Populates the DSC PPS + * + * @pps_payload: + * Bitwise struct for DSC Picture Parameter Set. This is defined + * by &struct drm_dsc_picture_parameter_set + * @dsc_cfg: + * DSC Configuration data filled by driver as defined by + * &struct drm_dsc_config + * + * DSC source device sends a picture parameter set (PPS) containing the + * information required by the sink to decode the compressed frame. Driver + * populates the DSC PPS struct using the DSC configuration parameters in + * the order expected by the DSC Display Sink device. For the DSC, the sink + * device expects the PPS payload in big endian format for fields + * that span more than 1 byte. + */ + +#include +#include +#include +#include + +#ifndef HAVE_DRM_DSC_PPS_PAYLOAD_PACK +void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_payload, + const struct drm_dsc_config *dsc_cfg) +{ + int i; + + /* Protect against someone accidentally changing struct size */ + BUILD_BUG_ON(sizeof(*pps_payload) != + DP_SDP_PPS_HEADER_PAYLOAD_BYTES_MINUS_1 + 1); + + memset(pps_payload, 0, sizeof(*pps_payload)); + + /* PPS 0 */ + pps_payload->dsc_version = + dsc_cfg->dsc_version_minor | + dsc_cfg->dsc_version_major << DSC_PPS_VERSION_MAJOR_SHIFT; + + /* PPS 1, 2 is 0 */ + + /* PPS 3 */ + pps_payload->pps_3 = + dsc_cfg->line_buf_depth | + dsc_cfg->bits_per_component << DSC_PPS_BPC_SHIFT; + + /* PPS 4 */ + pps_payload->pps_4 = + ((dsc_cfg->bits_per_pixel & DSC_PPS_BPP_HIGH_MASK) >> + DSC_PPS_MSB_SHIFT) | + dsc_cfg->vbr_enable << DSC_PPS_VBR_EN_SHIFT | +#ifdef HAVE_DRM_DSC_CONFIG_SIMPLE_422 + dsc_cfg->simple_422 << DSC_PPS_SIMPLE422_SHIFT | +#endif + dsc_cfg->convert_rgb << DSC_PPS_CONVERT_RGB_SHIFT | + dsc_cfg->block_pred_enable << DSC_PPS_BLOCK_PRED_EN_SHIFT; + + /* PPS 5 */ + pps_payload->bits_per_pixel_low = + (dsc_cfg->bits_per_pixel & DSC_PPS_LSB_MASK); + + /* + * The DSC panel expects the PPS packet to have big endian format + * for data spanning 2 bytes. Use a macro cpu_to_be16() to convert + * to big endian format. If format is little endian, it will swap + * bytes to convert to Big endian else keep it unchanged. + */ + + /* PPS 6, 7 */ + pps_payload->pic_height = cpu_to_be16(dsc_cfg->pic_height); + + /* PPS 8, 9 */ + pps_payload->pic_width = cpu_to_be16(dsc_cfg->pic_width); + + /* PPS 10, 11 */ + pps_payload->slice_height = cpu_to_be16(dsc_cfg->slice_height); + + /* PPS 12, 13 */ + pps_payload->slice_width = cpu_to_be16(dsc_cfg->slice_width); + + /* PPS 14, 15 */ + pps_payload->chunk_size = cpu_to_be16(dsc_cfg->slice_chunk_size); + + /* PPS 16 */ + pps_payload->initial_xmit_delay_high = + ((dsc_cfg->initial_xmit_delay & + DSC_PPS_INIT_XMIT_DELAY_HIGH_MASK) >> + DSC_PPS_MSB_SHIFT); + + /* PPS 17 */ + pps_payload->initial_xmit_delay_low = + (dsc_cfg->initial_xmit_delay & DSC_PPS_LSB_MASK); + + /* PPS 18, 19 */ + pps_payload->initial_dec_delay = + cpu_to_be16(dsc_cfg->initial_dec_delay); + + /* PPS 20 is 0 */ + + /* PPS 21 */ + pps_payload->initial_scale_value = + dsc_cfg->initial_scale_value; + + /* PPS 22, 23 */ + pps_payload->scale_increment_interval = + cpu_to_be16(dsc_cfg->scale_increment_interval); + + /* PPS 24 */ + pps_payload->scale_decrement_interval_high = + ((dsc_cfg->scale_decrement_interval & + DSC_PPS_SCALE_DEC_INT_HIGH_MASK) >> + DSC_PPS_MSB_SHIFT); + + /* PPS 25 */ + pps_payload->scale_decrement_interval_low = + (dsc_cfg->scale_decrement_interval & DSC_PPS_LSB_MASK); + + /* PPS 26[7:0], PPS 27[7:5] RESERVED */ + + /* PPS 27 */ + pps_payload->first_line_bpg_offset = + dsc_cfg->first_line_bpg_offset; + + /* PPS 28, 29 */ + pps_payload->nfl_bpg_offset = + cpu_to_be16(dsc_cfg->nfl_bpg_offset); + + /* PPS 30, 31 */ + pps_payload->slice_bpg_offset = + cpu_to_be16(dsc_cfg->slice_bpg_offset); + + /* PPS 32, 33 */ + pps_payload->initial_offset = + cpu_to_be16(dsc_cfg->initial_offset); + + /* PPS 34, 35 */ + pps_payload->final_offset = cpu_to_be16(dsc_cfg->final_offset); + + /* PPS 36 */ + pps_payload->flatness_min_qp = dsc_cfg->flatness_min_qp; + + /* PPS 37 */ + pps_payload->flatness_max_qp = dsc_cfg->flatness_max_qp; + + /* PPS 38, 39 */ + pps_payload->rc_model_size = cpu_to_be16(dsc_cfg->rc_model_size); + + /* PPS 40 */ + pps_payload->rc_edge_factor = DSC_RC_EDGE_FACTOR_CONST; + + /* PPS 41 */ + pps_payload->rc_quant_incr_limit0 = + dsc_cfg->rc_quant_incr_limit0; + + /* PPS 42 */ + pps_payload->rc_quant_incr_limit1 = + dsc_cfg->rc_quant_incr_limit1; + + /* PPS 43 */ + pps_payload->rc_tgt_offset = DSC_RC_TGT_OFFSET_LO_CONST | + DSC_RC_TGT_OFFSET_HI_CONST << DSC_PPS_RC_TGT_OFFSET_HI_SHIFT; + + /* PPS 44 - 57 */ + for (i = 0; i < DSC_NUM_BUF_RANGES - 1; i++) + pps_payload->rc_buf_thresh[i] = + dsc_cfg->rc_buf_thresh[i]; + + /* PPS 58 - 87 */ + /* + * For DSC sink programming the RC Range parameter fields + * are as follows: Min_qp[15:11], max_qp[10:6], offset[5:0] + */ + for (i = 0; i < DSC_NUM_BUF_RANGES; i++) { + pps_payload->rc_range_parameters[i] = + cpu_to_be16((dsc_cfg->rc_range_params[i].range_min_qp << + DSC_PPS_RC_RANGE_MINQP_SHIFT) | + (dsc_cfg->rc_range_params[i].range_max_qp << + DSC_PPS_RC_RANGE_MAXQP_SHIFT) | + (dsc_cfg->rc_range_params[i].range_bpg_offset)); + } + + /* PPS 88 */ + pps_payload->native_422_420 = dsc_cfg->native_422 | + dsc_cfg->native_420 << DSC_PPS_NATIVE_420_SHIFT; + + /* PPS 89 */ + pps_payload->second_line_bpg_offset = + dsc_cfg->second_line_bpg_offset; + + /* PPS 90, 91 */ + pps_payload->nsl_bpg_offset = + cpu_to_be16(dsc_cfg->nsl_bpg_offset); + + /* PPS 92, 93 */ + pps_payload->second_line_offset_adj = + cpu_to_be16(dsc_cfg->second_line_offset_adj); + + /* PPS 94 - 127 are O */ +} +EXPORT_SYMBOL(drm_dsc_pps_payload_pack); +#endif /* HAVE_DRM_DSC_PPS_PAYLOAD_PACK */ + +#ifndef HAVE_DRM_DSC_COMPUTE_RC_PARAMETERS +int drm_dsc_compute_rc_parameters(struct drm_dsc_config *vdsc_cfg) +{ + unsigned long groups_per_line = 0; + unsigned long groups_total = 0; + unsigned long num_extra_mux_bits = 0; + unsigned long slice_bits = 0; + unsigned long hrd_delay = 0; + unsigned long final_scale = 0; + unsigned long rbs_min = 0; + + if (vdsc_cfg->native_420 || vdsc_cfg->native_422) { + /* Number of groups used to code each line of a slice */ + groups_per_line = DIV_ROUND_UP(vdsc_cfg->slice_width / 2, + DSC_RC_PIXELS_PER_GROUP); + + /* chunksize in Bytes */ + vdsc_cfg->slice_chunk_size = DIV_ROUND_UP(vdsc_cfg->slice_width / 2 * + vdsc_cfg->bits_per_pixel, + (8 * 16)); + } else { + /* Number of groups used to code each line of a slice */ + groups_per_line = DIV_ROUND_UP(vdsc_cfg->slice_width, + DSC_RC_PIXELS_PER_GROUP); + + /* chunksize in Bytes */ + vdsc_cfg->slice_chunk_size = DIV_ROUND_UP(vdsc_cfg->slice_width * + vdsc_cfg->bits_per_pixel, + (8 * 16)); + } + + if (vdsc_cfg->convert_rgb) + num_extra_mux_bits = 3 * (vdsc_cfg->mux_word_size + + (4 * vdsc_cfg->bits_per_component + 4) + - 2); + else if (vdsc_cfg->native_422) + num_extra_mux_bits = 4 * vdsc_cfg->mux_word_size + + (4 * vdsc_cfg->bits_per_component + 4) + + 3 * (4 * vdsc_cfg->bits_per_component) - 2; + else + num_extra_mux_bits = 3 * vdsc_cfg->mux_word_size + + (4 * vdsc_cfg->bits_per_component + 4) + + 2 * (4 * vdsc_cfg->bits_per_component) - 2; + /* Number of bits in one Slice */ + slice_bits = 8 * vdsc_cfg->slice_chunk_size * vdsc_cfg->slice_height; + + while ((num_extra_mux_bits > 0) && + ((slice_bits - num_extra_mux_bits) % vdsc_cfg->mux_word_size)) + num_extra_mux_bits--; + + if (groups_per_line < vdsc_cfg->initial_scale_value - 8) + vdsc_cfg->initial_scale_value = groups_per_line + 8; + + /* scale_decrement_interval calculation according to DSC spec 1.11 */ + if (vdsc_cfg->initial_scale_value > 8) + vdsc_cfg->scale_decrement_interval = groups_per_line / + (vdsc_cfg->initial_scale_value - 8); + else + vdsc_cfg->scale_decrement_interval = DSC_SCALE_DECREMENT_INTERVAL_MAX; + + vdsc_cfg->final_offset = vdsc_cfg->rc_model_size - + (vdsc_cfg->initial_xmit_delay * + vdsc_cfg->bits_per_pixel + 8) / 16 + num_extra_mux_bits; + + if (vdsc_cfg->final_offset >= vdsc_cfg->rc_model_size) { + DRM_DEBUG_KMS("FinalOfs < RcModelSze for this InitialXmitDelay\n"); + return -ERANGE; + } + + final_scale = (vdsc_cfg->rc_model_size * 8) / + (vdsc_cfg->rc_model_size - vdsc_cfg->final_offset); + if (vdsc_cfg->slice_height > 1) + /* + * NflBpgOffset is 16 bit value with 11 fractional bits + * hence we multiply by 2^11 for preserving the + * fractional part + */ + vdsc_cfg->nfl_bpg_offset = DIV_ROUND_UP((vdsc_cfg->first_line_bpg_offset << 11), + (vdsc_cfg->slice_height - 1)); + else + vdsc_cfg->nfl_bpg_offset = 0; + + /* Number of groups used to code the entire slice */ + groups_total = groups_per_line * vdsc_cfg->slice_height; + + /* slice_bpg_offset is 16 bit value with 11 fractional bits */ + vdsc_cfg->slice_bpg_offset = DIV_ROUND_UP(((vdsc_cfg->rc_model_size - + vdsc_cfg->initial_offset + + num_extra_mux_bits) << 11), + groups_total); + + if (final_scale > 9) { + /* + * ScaleIncrementInterval = + * finaloffset/((NflBpgOffset + SliceBpgOffset)*8(finalscale - 1.125)) + * as (NflBpgOffset + SliceBpgOffset) has 11 bit fractional value, + * we need divide by 2^11 from pstDscCfg values + */ + vdsc_cfg->scale_increment_interval = + (vdsc_cfg->final_offset * (1 << 11)) / + ((vdsc_cfg->nfl_bpg_offset + + vdsc_cfg->slice_bpg_offset) * + (final_scale - 9)); + } else { + /* + * If finalScaleValue is less than or equal to 9, a value of 0 should + * be used to disable the scale increment at the end of the slice + */ + vdsc_cfg->scale_increment_interval = 0; + } + + /* + * DSC spec mentions that bits_per_pixel specifies the target + * bits/pixel (bpp) rate that is used by the encoder, + * in steps of 1/16 of a bit per pixel + */ + rbs_min = vdsc_cfg->rc_model_size - vdsc_cfg->initial_offset + + DIV_ROUND_UP(vdsc_cfg->initial_xmit_delay * + vdsc_cfg->bits_per_pixel, 16) + + groups_per_line * vdsc_cfg->first_line_bpg_offset; + + hrd_delay = DIV_ROUND_UP((rbs_min * 16), vdsc_cfg->bits_per_pixel); + vdsc_cfg->rc_bits = (hrd_delay * vdsc_cfg->bits_per_pixel) / 16; + vdsc_cfg->initial_dec_delay = hrd_delay - vdsc_cfg->initial_xmit_delay; + + return 0; +} +EXPORT_SYMBOL(drm_dsc_compute_rc_parameters); +#endif /* HAVE_DRM_DSC_COMPUTE_RC_PARAMETERS */ \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_edid.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_edid.c new file mode 100644 index 0000000000000..c5272121a0ab4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_edid.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Intel Corporation. + * + * Authors: + * Ramalingam C + */ +#include +#include + +#ifndef HAVE_DRM_EDID_MALLOC +static const struct drm_edid *__kcl_drm_edid_alloc(const void *edid, size_t size) +{ + struct drm_edid *drm_edid; + + if (!edid || !size || size < EDID_LENGTH) + return NULL; + + drm_edid = kzalloc(sizeof(*drm_edid), GFP_KERNEL); + if (drm_edid) { + drm_edid->edid = edid; + drm_edid->size = size; + } + + return drm_edid; +} + +const struct drm_edid *_kcl_drm_edid_alloc(const void *edid, size_t size) +{ + const struct drm_edid *drm_edid; + + if (!edid || !size || size < EDID_LENGTH) + return NULL; + + edid = kmemdup(edid, size, GFP_KERNEL); + if (!edid) + return NULL; + + drm_edid = __kcl_drm_edid_alloc(edid, size); + if (!drm_edid) + kfree(edid); + + return drm_edid; +} +EXPORT_SYMBOL(_kcl_drm_edid_alloc); + +void _kcl_drm_edid_free(const struct drm_edid *drm_edid) +{ + if (!drm_edid) + return; + + kfree(drm_edid->edid); + kfree(drm_edid); +} +EXPORT_SYMBOL(_kcl_drm_edid_free); +#endif + +#ifndef HAVE_DRM_EDID_RAW +static int edid_extension_block_count(const struct edid *edid) +{ + return edid->extensions; +} + +static int edid_block_count(const struct edid *edid) +{ + return edid_extension_block_count(edid) + 1; +} + +static int edid_size_by_blocks(int num_blocks) +{ + return num_blocks * EDID_LENGTH; +} + +static int edid_size(const struct edid *edid) +{ + return edid_size_by_blocks(edid_block_count(edid)); +} + +const struct edid *_kcl_drm_edid_raw(const struct drm_edid *drm_edid) +{ + if (!drm_edid || !drm_edid->size) + return NULL; + + /* + * Do not return pointers where relying on EDID extension count would + * lead to buffer overflow. + */ + if (WARN_ON(edid_size(drm_edid->edid) > drm_edid->size)) + return NULL; + + return drm_edid->edid; +} +EXPORT_SYMBOL(_kcl_drm_edid_raw); +#endif + diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_exec.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_exec.c new file mode 100644 index 0000000000000..1ce1651265c24 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_exec.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +#include +#include +#include +#include +#include + +#ifndef HAVE_DRM_DRM_EXEC_H +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing hardware operations (e.g. command + * submissions, page table updates etc..). + * + * If a contention is detected while locking a GEM object the cleanup procedure + * unlocks all previously locked GEM objects and locks the contended one first + * before locking any further objects. + * + * After an object is locked fences slots can optionally be reserved on the + * dma_resv object inside the GEM object. + * + * A typical usage pattern should look like this:: + * + * struct drm_gem_object *obj; + * struct drm_exec exec; + * unsigned long index; + * int ret; + * + * drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + * drm_exec_until_all_locked(&exec) { + * ret = drm_exec_prepare_obj(&exec, boA, 1); + * drm_exec_retry_on_contention(&exec); + * if (ret) + * goto error; + * + * ret = drm_exec_prepare_obj(&exec, boB, 1); + * drm_exec_retry_on_contention(&exec); + * if (ret) + * goto error; + * } + * + * drm_exec_for_each_locked_object(&exec, index, obj) { + * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ); + * ... + * } + * drm_exec_fini(&exec); + * + * See struct dma_exec for more details. + */ + +/* Dummy value used to initially enter the retry loop */ +#define DRM_EXEC_DUMMY ((void *)~0) + +/* Unlock all objects and drop references */ +static void drm_exec_unlock_all(struct drm_exec *exec) +{ + struct drm_gem_object *obj; + unsigned long index; + + drm_exec_for_each_locked_object(exec, index, obj) { + dma_resv_unlock(amdkcl_gem_resvp(obj)); + drm_gem_object_put(obj); + } + + drm_gem_object_put(exec->prelocked); + exec->prelocked = NULL; +} + +/** + * drm_exec_init - initialize a drm_exec object + * @exec: the drm_exec object to initialize + * @flags: controls locking behavior, see DRM_EXEC_* defines + * + * Initialize the object and make sure that we can track locked objects. + */ +void drm_exec_init(struct drm_exec *exec, uint32_t flags) +{ + exec->flags = flags; + exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); + + /* If allocation here fails, just delay that till the first use */ + exec->max_objects = exec->objects ? PAGE_SIZE / sizeof(void *) : 0; + exec->num_objects = 0; + exec->contended = DRM_EXEC_DUMMY; + exec->prelocked = NULL; +} +EXPORT_SYMBOL(drm_exec_init); + +/** + * drm_exec_fini - finalize a drm_exec object + * @exec: the drm_exec object to finalize + * + * Unlock all locked objects, drop the references to objects and free all memory + * used for tracking the state. + */ +void drm_exec_fini(struct drm_exec *exec) +{ + drm_exec_unlock_all(exec); + kvfree(exec->objects); + if (exec->contended != DRM_EXEC_DUMMY) { + drm_gem_object_put(exec->contended); + ww_acquire_fini(&exec->ticket); + } +} +EXPORT_SYMBOL(drm_exec_fini); + +/** + * drm_exec_cleanup - cleanup when contention is detected + * @exec: the drm_exec object to cleanup + * + * Cleanup the current state and return true if we should stay inside the retry + * loop, false if there wasn't any contention detected and we can keep the + * objects locked. + */ +bool drm_exec_cleanup(struct drm_exec *exec) +{ + if (likely(!exec->contended)) { + ww_acquire_done(&exec->ticket); + return false; + } + + if (likely(exec->contended == DRM_EXEC_DUMMY)) { + exec->contended = NULL; + ww_acquire_init(&exec->ticket, &reservation_ww_class); + return true; + } + + drm_exec_unlock_all(exec); + exec->num_objects = 0; + return true; +} +EXPORT_SYMBOL(drm_exec_cleanup); + +/* Track the locked object in the array */ +static int drm_exec_obj_locked(struct drm_exec *exec, + struct drm_gem_object *obj) +{ + if (unlikely(exec->num_objects == exec->max_objects)) { + size_t size = exec->max_objects * sizeof(void *); + void *tmp; + + tmp = kvrealloc(exec->objects, size, size + PAGE_SIZE, + GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + exec->objects = tmp; + exec->max_objects += PAGE_SIZE / sizeof(void *); + } + drm_gem_object_get(obj); + exec->objects[exec->num_objects++] = obj; + + return 0; +} + +/* Make sure the contended object is locked first */ +static int drm_exec_lock_contended(struct drm_exec *exec) +{ + struct drm_gem_object *obj = exec->contended; + int ret; + + if (likely(!obj)) + return 0; + + /* Always cleanup the contention so that error handling can kick in */ + exec->contended = NULL; + if (exec->flags & DRM_EXEC_INTERRUPTIBLE_WAIT) { + ret = dma_resv_lock_slow_interruptible(amdkcl_gem_resvp(obj), + &exec->ticket); + if (unlikely(ret)) + goto error_dropref; + } else { + dma_resv_lock_slow(amdkcl_gem_resvp(obj), &exec->ticket); + } + + ret = drm_exec_obj_locked(exec, obj); + if (unlikely(ret)) + goto error_unlock; + + exec->prelocked = obj; + return 0; + +error_unlock: + dma_resv_unlock(amdkcl_gem_resvp(obj)); + +error_dropref: + drm_gem_object_put(obj); + return ret; +} + +/** + * drm_exec_lock_obj - lock a GEM object for use + * @exec: the drm_exec object with the state + * @obj: the GEM object to lock + * + * Lock a GEM object for use and grab a reference to it. + * + * Returns: -EDEADLK if a contention is detected, -EALREADY when object is + * already locked (can be suppressed by setting the DRM_EXEC_IGNORE_DUPLICATES + * flag), -ENOMEM when memory allocation failed and zero for success. + */ +int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj) +{ + int ret; + + ret = drm_exec_lock_contended(exec); + if (unlikely(ret)) + return ret; + + if (exec->prelocked == obj) { + drm_gem_object_put(exec->prelocked); + exec->prelocked = NULL; + return 0; + } + + if (exec->flags & DRM_EXEC_INTERRUPTIBLE_WAIT) + ret = dma_resv_lock_interruptible(amdkcl_gem_resvp(obj), &exec->ticket); + else + ret = dma_resv_lock(amdkcl_gem_resvp(obj), &exec->ticket); + + if (unlikely(ret == -EDEADLK)) { + drm_gem_object_get(obj); + exec->contended = obj; + return -EDEADLK; + } + + if (unlikely(ret == -EALREADY) && + exec->flags & DRM_EXEC_IGNORE_DUPLICATES) + return 0; + + if (unlikely(ret)) + return ret; + + ret = drm_exec_obj_locked(exec, obj); + if (ret) + goto error_unlock; + + return 0; + +error_unlock: + dma_resv_unlock(amdkcl_gem_resvp(obj)); + return ret; +} +EXPORT_SYMBOL(drm_exec_lock_obj); + +/** + * drm_exec_unlock_obj - unlock a GEM object in this exec context + * @exec: the drm_exec object with the state + * @obj: the GEM object to unlock + * + * Unlock the GEM object and remove it from the collection of locked objects. + * Should only be used to unlock the most recently locked objects. It's not time + * efficient to unlock objects locked long ago. + */ +void drm_exec_unlock_obj(struct drm_exec *exec, struct drm_gem_object *obj) +{ + unsigned int i; + + for (i = exec->num_objects; i--;) { + if (exec->objects[i] == obj) { + dma_resv_unlock(amdkcl_gem_resvp(obj)); + for (++i; i < exec->num_objects; ++i) + exec->objects[i - 1] = exec->objects[i]; + --exec->num_objects; + drm_gem_object_put(obj); + return; + } + + } +} +EXPORT_SYMBOL(drm_exec_unlock_obj); + +/** + * drm_exec_prepare_obj - prepare a GEM object for use + * @exec: the drm_exec object with the state + * @obj: the GEM object to prepare + * @num_fences: how many fences to reserve + * + * Prepare a GEM object for use by locking it and reserving fence slots. + * + * Returns: -EDEADLK if a contention is detected, -EALREADY when object is + * already locked, -ENOMEM when memory allocation failed and zero for success. + */ +int drm_exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, + unsigned int num_fences) +{ + int ret; + + ret = drm_exec_lock_obj(exec, obj); + if (ret) + return ret; + + ret = dma_resv_reserve_fences(amdkcl_gem_resvp(obj), num_fences); + if (ret) { + drm_exec_unlock_obj(exec, obj); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(drm_exec_prepare_obj); + +/** + * drm_exec_prepare_array - helper to prepare an array of objects + * @exec: the drm_exec object with the state + * @objects: array of GEM object to prepare + * @num_objects: number of GEM objects in the array + * @num_fences: number of fences to reserve on each GEM object + * + * Prepares all GEM objects in an array, aborts on first error. + * Reserves @num_fences on each GEM object after locking it. + * + * Returns: -EDEADLOCK on contention, -EALREADY when object is already locked, + * -ENOMEM when memory allocation failed and zero for success. + */ +int drm_exec_prepare_array(struct drm_exec *exec, + struct drm_gem_object **objects, + unsigned int num_objects, + unsigned int num_fences) +{ + int ret; + + for (unsigned int i = 0; i < num_objects; ++i) { + ret = drm_exec_prepare_obj(exec, objects[i], num_fences); + if (unlikely(ret)) + return ret; + } + + return 0; +} +EXPORT_SYMBOL(drm_exec_prepare_array); + +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_fb.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_fb.c new file mode 100644 index 0000000000000..11e5390896f68 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_fb.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2006-2009 Red Hat Inc. + * Copyright (c) 2006-2008 Intel Corporation + * Copyright (c) 2007 Dave Airlie + * + * DRM framebuffer helper functions + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Authors: + * Dave Airlie + * Jesse Barnes + */ +#include +#include +#include +#include +#include +#include +#include + +#ifndef HAVE_DRM_FB_HELPER_FILL_INFO +void drm_fb_helper_fill_info(struct fb_info *info, + struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_framebuffer *fb = fb_helper->fb; + + drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); + drm_fb_helper_fill_var(info, fb_helper, + sizes->fb_width, sizes->fb_height); + + info->par = fb_helper; + snprintf(info->fix.id, sizeof(info->fix.id), "%sdrmfb", + fb_helper->dev->driver->name); + +} +EXPORT_SYMBOL(drm_fb_helper_fill_info); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_hdcp.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_hdcp.c new file mode 100644 index 0000000000000..21686ff9a5950 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_hdcp.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Intel Corporation. + * + * Authors: + * Ramalingam C + */ +#include + +#ifndef HAVE_DRM_HDCP_UPDATE_CONTENT_PROTECTION +/* Copied from v5.3-rc1-380-gbb5a45d40d50 drivers/gpu/drm/drm_hdcp.c */ +void _kcl_drm_hdcp_update_content_protection(struct drm_connector *connector, + u64 val) +{ + struct drm_device *dev = connector->dev; + struct drm_connector_state *state = connector->state; + + WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); + if (state->content_protection == val) + return; + + state->content_protection = val; +} +EXPORT_SYMBOL(_kcl_drm_hdcp_update_content_protection); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_modes.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_modes.c new file mode 100644 index 0000000000000..a7963c347e685 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_modes.c @@ -0,0 +1,46 @@ +/* + * Copyright © 1997-2003 by The XFree86 Project, Inc. + * Copyright © 2007 Dave Airlie + * Copyright © 2007-2008 Intel Corporation + * Jesse Barnes + * Copyright 2005-2006 Luc Verhaegen + * Copyright (c) 2001, Andy Ritger aritger@nvidia.com + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of the copyright holder(s) + * and author(s) shall not be used in advertising or otherwise to promote + * the sale, use or other dealings in this Software without prior written + * authorization from the copyright holder(s) and author(s). + */ +#include +#include +#include + +#ifndef HAVE_DRM_MODE_INIT +void drm_mode_init(struct drm_display_mode *dst, const struct drm_display_mode *src) +{ + struct list_head head = dst->head; + + memset(dst, 0, sizeof(*dst)); + *dst = *src; + dst->head = head; +} +EXPORT_SYMBOL(drm_mode_init); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_prime.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_prime.c new file mode 100644 index 0000000000000..36ca9dec40c2b --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_prime.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * driver/drm/drm_prime.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#include + +#ifndef HAVE_DRM_GEM_PRIME_HANDLE_TO_FD +int (*_kcl_drm_gem_prime_handle_to_fd)(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, + int *prime_fd); +EXPORT_SYMBOL(_kcl_drm_gem_prime_handle_to_fd); + +int (*_kcl_drm_gem_prime_fd_to_handle)(struct drm_device *dev, + struct drm_file *file_priv, int prime_fd, + uint32_t *handle); +EXPORT_SYMBOL(_kcl_drm_gem_prime_fd_to_handle); +#endif + +void amdkcl_prime_init(void) +{ +#ifndef HAVE_DRM_GEM_PRIME_HANDLE_TO_FD + _kcl_drm_gem_prime_handle_to_fd = amdkcl_fp_setup("drm_gem_prime_handle_to_fd", NULL); + _kcl_drm_gem_prime_fd_to_handle = amdkcl_fp_setup("drm_gem_prime_fd_to_handle", NULL); +#endif +} diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_print.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_print.c new file mode 100644 index 0000000000000..68e4abe6470c6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_print.c @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ +#include +#include +#include + +#if !defined(HAVE_DRM_ERR_MACRO) +void kcl_drm_err(const char *format, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, format); + vaf.fmt = format; + vaf.va = &args; + + printk(KERN_ERR "[" DRM_NAME ":%ps] *ERROR* %pV", + __builtin_return_address(0), &vaf); + + va_end(args); +} +EXPORT_SYMBOL(kcl_drm_err); + +#endif + +#ifndef HAVE_DRM_PRINT_BITS +/* Copied from v5.3-rc1-684-g141f6357f45c drivers/gpu/drm/drm_print.c */ +void drm_print_bits(struct drm_printer *p, unsigned long value, + const char * const bits[], unsigned int nbits) +{ + bool first = true; + unsigned int i; + + if (WARN_ON_ONCE(nbits > BITS_PER_TYPE(value))) + nbits = BITS_PER_TYPE(value); + + for_each_set_bit(i, &value, nbits) { + if (WARN_ON_ONCE(!bits[i])) + continue; + drm_printf(p, "%s%s", first ? "" : ",", + bits[i]); + first = false; + } + if (first) + drm_printf(p, "(none)"); +} +EXPORT_SYMBOL(drm_print_bits); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_simple_kms_helper.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_simple_kms_helper.c new file mode 100644 index 0000000000000..7a44428ce88e2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_simple_kms_helper.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2016 Noralf Trønnes + */ + +#include + +/* Copied from drivers/gpu/drm/drm_simple_kms_helper.c and modified for KCL */ +#ifndef HAVE_DRM_SIMPLE_ENCODER_INIT +static const struct drm_encoder_funcs drm_simple_encoder_funcs_cleanup = { + .destroy = drm_encoder_cleanup, +}; + +int _kcl_drm_simple_encoder_init(struct drm_device *dev, + struct drm_encoder *encoder, + int encoder_type) +{ + return drm_encoder_init(dev, encoder, + &drm_simple_encoder_funcs_cleanup, + encoder_type, NULL); +} +EXPORT_SYMBOL(_kcl_drm_simple_encoder_init); + +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_suballoc.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_suballoc.c new file mode 100644 index 0000000000000..8ad6e3d9b60eb --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_suballoc.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2011 Red Hat Inc. + * Copyright 2023 Intel Corporation. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* Algorithm: + * + * We store the last allocated bo in "hole", we always try to allocate + * after the last allocated bo. Principle is that in a linear GPU ring + * progression was is after last is the oldest bo we allocated and thus + * the first one that should no longer be in use by the GPU. + * + * If it's not the case we skip over the bo after last to the closest + * done bo if such one exist. If none exist and we are not asked to + * block we report failure to allocate. + * + * If we are asked to block we wait on all the oldest fence of all + * rings. We just wait for any of those fence to complete. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef HAVE_DRM_SUBALLOC_MANAGER_INIT +static void drm_suballoc_remove_locked(struct drm_suballoc *sa); +static void drm_suballoc_try_free(struct drm_suballoc_manager *sa_manager); + +/** + * drm_suballoc_manager_init() - Initialise the drm_suballoc_manager + * @sa_manager: pointer to the sa_manager + * @size: number of bytes we want to suballocate + * @align: alignment for each suballocated chunk + * + * Prepares the suballocation manager for suballocations. + */ +void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager, + size_t size, size_t align) +{ + unsigned int i; + + BUILD_BUG_ON(!is_power_of_2(DRM_SUBALLOC_MAX_QUEUES)); + + if (!align) + align = 1; + + /* alignment must be a power of 2 */ + if (WARN_ON_ONCE(align & (align - 1))) + align = roundup_pow_of_two(align); + + init_waitqueue_head(&sa_manager->wq); + sa_manager->size = size; + sa_manager->align = align; + sa_manager->hole = &sa_manager->olist; + INIT_LIST_HEAD(&sa_manager->olist); + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) + INIT_LIST_HEAD(&sa_manager->flist[i]); +} +EXPORT_SYMBOL(drm_suballoc_manager_init); + +/** + * drm_suballoc_manager_fini() - Destroy the drm_suballoc_manager + * @sa_manager: pointer to the sa_manager + * + * Cleans up the suballocation manager after use. All fences added + * with drm_suballoc_free() must be signaled, or we cannot clean up + * the entire manager. + */ +void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager) +{ + struct drm_suballoc *sa, *tmp; + + if (!sa_manager->size) + return; + + if (!list_empty(&sa_manager->olist)) { + sa_manager->hole = &sa_manager->olist; + drm_suballoc_try_free(sa_manager); + if (!list_empty(&sa_manager->olist)) + DRM_ERROR("sa_manager is not empty, clearing anyway\n"); + } + list_for_each_entry_safe(sa, tmp, &sa_manager->olist, olist) { + drm_suballoc_remove_locked(sa); + } + + sa_manager->size = 0; +} +EXPORT_SYMBOL(drm_suballoc_manager_fini); + +static void drm_suballoc_remove_locked(struct drm_suballoc *sa) +{ + struct drm_suballoc_manager *sa_manager = sa->manager; + + if (sa_manager->hole == &sa->olist) + sa_manager->hole = sa->olist.prev; + + list_del_init(&sa->olist); + list_del_init(&sa->flist); + dma_fence_put(sa->fence); + kfree(sa); +} + +static void drm_suballoc_try_free(struct drm_suballoc_manager *sa_manager) +{ + struct drm_suballoc *sa, *tmp; + + if (sa_manager->hole->next == &sa_manager->olist) + return; + + sa = list_entry(sa_manager->hole->next, struct drm_suballoc, olist); + list_for_each_entry_safe_from(sa, tmp, &sa_manager->olist, olist) { + if (!sa->fence || !dma_fence_is_signaled(sa->fence)) + return; + + drm_suballoc_remove_locked(sa); + } +} + +static size_t drm_suballoc_hole_soffset(struct drm_suballoc_manager *sa_manager) +{ + struct list_head *hole = sa_manager->hole; + + if (hole != &sa_manager->olist) + return list_entry(hole, struct drm_suballoc, olist)->eoffset; + + return 0; +} + +static size_t drm_suballoc_hole_eoffset(struct drm_suballoc_manager *sa_manager) +{ + struct list_head *hole = sa_manager->hole; + + if (hole->next != &sa_manager->olist) + return list_entry(hole->next, struct drm_suballoc, olist)->soffset; + return sa_manager->size; +} + +static bool drm_suballoc_try_alloc(struct drm_suballoc_manager *sa_manager, + struct drm_suballoc *sa, + size_t size, size_t align) +{ + size_t soffset, eoffset, wasted; + + soffset = drm_suballoc_hole_soffset(sa_manager); + eoffset = drm_suballoc_hole_eoffset(sa_manager); + wasted = round_up(soffset, align) - soffset; + + if ((eoffset - soffset) >= (size + wasted)) { + soffset += wasted; + + sa->manager = sa_manager; + sa->soffset = soffset; + sa->eoffset = soffset + size; + list_add(&sa->olist, sa_manager->hole); + INIT_LIST_HEAD(&sa->flist); + sa_manager->hole = &sa->olist; + return true; + } + return false; +} + +static bool __drm_suballoc_event(struct drm_suballoc_manager *sa_manager, + size_t size, size_t align) +{ + size_t soffset, eoffset, wasted; + unsigned int i; + + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) + if (!list_empty(&sa_manager->flist[i])) + return true; + + soffset = drm_suballoc_hole_soffset(sa_manager); + eoffset = drm_suballoc_hole_eoffset(sa_manager); + wasted = round_up(soffset, align) - soffset; + + return ((eoffset - soffset) >= (size + wasted)); +} + +/** + * drm_suballoc_event() - Check if we can stop waiting + * @sa_manager: pointer to the sa_manager + * @size: number of bytes we want to allocate + * @align: alignment we need to match + * + * Return: true if either there is a fence we can wait for or + * enough free memory to satisfy the allocation directly. + * false otherwise. + */ +static bool drm_suballoc_event(struct drm_suballoc_manager *sa_manager, + size_t size, size_t align) +{ + bool ret; + + spin_lock(&sa_manager->wq.lock); + ret = __drm_suballoc_event(sa_manager, size, align); + spin_unlock(&sa_manager->wq.lock); + return ret; +} + +static bool drm_suballoc_next_hole(struct drm_suballoc_manager *sa_manager, + struct dma_fence **fences, + unsigned int *tries) +{ + struct drm_suballoc *best_bo = NULL; + unsigned int i, best_idx; + size_t soffset, best, tmp; + + /* if hole points to the end of the buffer */ + if (sa_manager->hole->next == &sa_manager->olist) { + /* try again with its beginning */ + sa_manager->hole = &sa_manager->olist; + return true; + } + + soffset = drm_suballoc_hole_soffset(sa_manager); + /* to handle wrap around we add sa_manager->size */ + best = sa_manager->size * 2; + /* go over all fence list and try to find the closest sa + * of the current last + */ + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) { + struct drm_suballoc *sa; + + fences[i] = NULL; + + if (list_empty(&sa_manager->flist[i])) + continue; + + sa = list_first_entry(&sa_manager->flist[i], + struct drm_suballoc, flist); + + if (!dma_fence_is_signaled(sa->fence)) { + fences[i] = sa->fence; + continue; + } + + /* limit the number of tries each freelist gets */ + if (tries[i] > 2) + continue; + + tmp = sa->soffset; + if (tmp < soffset) { + /* wrap around, pretend it's after */ + tmp += sa_manager->size; + } + tmp -= soffset; + if (tmp < best) { + /* this sa bo is the closest one */ + best = tmp; + best_idx = i; + best_bo = sa; + } + } + + if (best_bo) { + ++tries[best_idx]; + sa_manager->hole = best_bo->olist.prev; + + /* + * We know that this one is signaled, + * so it's safe to remove it. + */ + drm_suballoc_remove_locked(best_bo); + return true; + } + return false; +} + +/** + * drm_suballoc_new() - Make a suballocation. + * @sa_manager: pointer to the sa_manager + * @size: number of bytes we want to suballocate. + * @gfp: gfp flags used for memory allocation. Typically GFP_KERNEL but + * the argument is provided for suballocations from reclaim context or + * where the caller wants to avoid pipelining rather than wait for + * reclaim. + * @intr: Whether to perform waits interruptible. This should typically + * always be true, unless the caller needs to propagate a + * non-interruptible context from above layers. + * @align: Alignment. Must not exceed the default manager alignment. + * If @align is zero, then the manager alignment is used. + * + * Try to make a suballocation of size @size, which will be rounded + * up to the alignment specified in specified in drm_suballoc_manager_init(). + * + * Return: a new suballocated bo, or an ERR_PTR. + */ +struct drm_suballoc * +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, + gfp_t gfp, bool intr, size_t align) +{ + struct dma_fence *fences[DRM_SUBALLOC_MAX_QUEUES]; + unsigned int tries[DRM_SUBALLOC_MAX_QUEUES]; + unsigned int count; + int i, r; + struct drm_suballoc *sa; + + if (WARN_ON_ONCE(align > sa_manager->align)) + return ERR_PTR(-EINVAL); + if (WARN_ON_ONCE(size > sa_manager->size || !size)) + return ERR_PTR(-EINVAL); + + if (!align) + align = sa_manager->align; + + sa = kmalloc(sizeof(*sa), gfp); + if (!sa) + return ERR_PTR(-ENOMEM); + sa->manager = sa_manager; + sa->fence = NULL; + INIT_LIST_HEAD(&sa->olist); + INIT_LIST_HEAD(&sa->flist); + + spin_lock(&sa_manager->wq.lock); + do { + for (i = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) + tries[i] = 0; + + do { + drm_suballoc_try_free(sa_manager); + + if (drm_suballoc_try_alloc(sa_manager, sa, + size, align)) { + spin_unlock(&sa_manager->wq.lock); + return sa; + } + + /* see if we can skip over some allocations */ + } while (drm_suballoc_next_hole(sa_manager, fences, tries)); + + for (i = 0, count = 0; i < DRM_SUBALLOC_MAX_QUEUES; ++i) + if (fences[i]) + fences[count++] = dma_fence_get(fences[i]); + + if (count) { + long t; + + spin_unlock(&sa_manager->wq.lock); + t = dma_fence_wait_any_timeout(fences, count, intr, + MAX_SCHEDULE_TIMEOUT, + NULL); + for (i = 0; i < count; ++i) + dma_fence_put(fences[i]); + + r = (t > 0) ? 0 : t; + spin_lock(&sa_manager->wq.lock); + } else if (intr) { + /* if we have nothing to wait for block */ + r = wait_event_interruptible_locked + (sa_manager->wq, + __drm_suballoc_event(sa_manager, size, align)); + } else { + spin_unlock(&sa_manager->wq.lock); + wait_event(sa_manager->wq, + drm_suballoc_event(sa_manager, size, align)); + r = 0; + spin_lock(&sa_manager->wq.lock); + } + } while (!r); + + spin_unlock(&sa_manager->wq.lock); + kfree(sa); + return ERR_PTR(r); +} +EXPORT_SYMBOL(drm_suballoc_new); + +/** + * drm_suballoc_free - Free a suballocation + * @suballoc: pointer to the suballocation + * @fence: fence that signals when suballocation is idle + * + * Free the suballocation. The suballocation can be re-used after @fence signals. + */ +void drm_suballoc_free(struct drm_suballoc *suballoc, + struct dma_fence *fence) +{ + struct drm_suballoc_manager *sa_manager; + + if (!suballoc) + return; + + sa_manager = suballoc->manager; + + spin_lock(&sa_manager->wq.lock); + if (fence && !dma_fence_is_signaled(fence)) { + u32 idx; + + suballoc->fence = dma_fence_get(fence); + idx = fence->context & (DRM_SUBALLOC_MAX_QUEUES - 1); + list_add_tail(&suballoc->flist, &sa_manager->flist[idx]); + } else { + drm_suballoc_remove_locked(suballoc); + } + wake_up_all_locked(&sa_manager->wq); + spin_unlock(&sa_manager->wq.lock); +} +EXPORT_SYMBOL(drm_suballoc_free); + +#ifdef CONFIG_DEBUG_FS +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager, + struct drm_printer *p, + unsigned long long suballoc_base) +{ + struct drm_suballoc *i; + + spin_lock(&sa_manager->wq.lock); + list_for_each_entry(i, &sa_manager->olist, olist) { + unsigned long long soffset = i->soffset; + unsigned long long eoffset = i->eoffset; + + if (&i->olist == sa_manager->hole) + drm_puts(p, ">"); + else + drm_puts(p, " "); + + drm_printf(p, "[0x%010llx 0x%010llx] size %8lld", + suballoc_base + soffset, suballoc_base + eoffset, + eoffset - soffset); + + if (i->fence) + drm_printf(p, " protected by 0x%016llx on context %llu", + (unsigned long long)i->fence->seqno, + (unsigned long long)i->fence->context); + + drm_puts(p, "\n"); + } + spin_unlock(&sa_manager->wq.lock); +} +EXPORT_SYMBOL(drm_suballoc_dump_debug_info); +#endif +MODULE_AUTHOR("Multiple"); +MODULE_DESCRIPTION("Range suballocator helper"); +MODULE_LICENSE("Dual MIT/GPL"); +#endif /*HAVE_DRM_SUBALLOC_MANAGER_INIT*/ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_drm_vblank.c b/drivers/gpu/drm/amd/amdkcl/kcl_drm_vblank.c new file mode 100644 index 0000000000000..f8d4ab7de31e3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_drm_vblank.c @@ -0,0 +1,43 @@ +/* + * drm_irq.c IRQ and vblank support + * + * \author Rickard E. (Rik) Faith + * \author Gareth Hughes + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include + +/*copy from drivers/gpu/drm/drm_vblank.c */ +#ifndef HAVE_CRTC_DRM_VBLANK_CRTC +static struct drm_vblank_crtc * +drm_vblank_crtc(struct drm_device *dev, unsigned int pipe) +{ + return &dev->vblank[pipe]; +} + +struct drm_vblank_crtc * +drm_crtc_vblank_crtc(struct drm_crtc *crtc) +{ + return drm_vblank_crtc(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_vblank_crtc); +#endif + diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_fbmem.h b/drivers/gpu/drm/amd/amdkcl/kcl_fbmem.h new file mode 100644 index 0000000000000..5275dfcb6b6ca --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_fbmem.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _AMDKCL_KCL_FBMEM_H_ +#define _AMDKCL_KCL_FBMEM_H_ + +#include +#include + +static inline +int _kcl_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, + const char *name) +{ +#ifdef HAVE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PP + return remove_conflicting_pci_framebuffers(pdev, name); +#else + /** + * v5.1-rc3-20-gb0e999c95581 fbdev: list all pci memory bars as conflicting apertures + * handle bar 0 directly. + * as remove_conflicting_pci_framebuffers() for bar 2/5 fails on rhel7.9 + int bar, err; + + for (bar = 0; bar < PCI_ROM_RESOURCE; bar++) { + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) + continue; + err = remove_conflicting_pci_framebuffers(pdev, bar, name); + if (err) + return err; + } + */ + pr_warn_once("remove conflicting pci framebuffers on bar 0\n"); + return remove_conflicting_pci_framebuffers(pdev, 0, name); +#endif +} +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_fence.c b/drivers/gpu/drm/amd/amdkcl/kcl_fence.c new file mode 100644 index 0000000000000..e79e331222d00 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_fence.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fence mechanism for dma-buf and to allow for asynchronous dma access + * + * Copyright (C) 2012 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include + +#define CREATE_TRACE_POINTS +#include "kcl_fence_trace.h" + +/* Copied from drivers/dma-buf/dma-fence.c */ +#if defined(AMDKCL_FENCE_DEFAULT_WAIT_TIMEOUT) || defined(AMDKCL_FENCE_WAIT_ANY_TIMEOUT) +static bool +dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, + uint32_t *idx) +{ + int i; + + for (i = 0; i < count; ++i) { + struct dma_fence *fence = fences[i]; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + if (idx) + *idx = i; + return true; + } + } + return false; +} +#endif + +struct default_wait_cb { + struct dma_fence_cb base; + struct task_struct *task; +}; + +#ifdef AMDKCL_FENCE_DEFAULT_WAIT_TIMEOUT +static void (*_kcl_fence_default_wait_cb)(struct dma_fence *fence, struct dma_fence_cb *cb); + +signed long +_kcl_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) +{ + struct default_wait_cb cb; + unsigned long flags; + signed long ret = timeout ? timeout : 1; + bool was_set; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return ret; + + spin_lock_irqsave(fence->lock, flags); + + if (intr && signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + + was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &fence->flags); + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + goto out; + + if (!was_set && fence->ops->enable_signaling) { + /* + * Modifications [2017-03-29] (c) [2017] + * Advanced Micro Devices, Inc. + */ + trace_kcl_fence_enable_signal(fence); + + if (!fence->ops->enable_signaling(fence)) { + dma_fence_signal_locked(fence); + goto out; + } + } + + if (!timeout) { + ret = 0; + goto out; + } + + cb.base.func = _kcl_fence_default_wait_cb; + cb.task = current; + list_add(&cb.base.node, &fence->cb_list); + + while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { + if (intr) + __set_current_state(TASK_INTERRUPTIBLE); + else + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irqrestore(fence->lock, flags); + + ret = schedule_timeout(ret); + + spin_lock_irqsave(fence->lock, flags); + if (ret > 0 && intr && signal_pending(current)) + ret = -ERESTARTSYS; + } + + if (!list_empty(&cb.base.node)) + list_del(&cb.base.node); + __set_current_state(TASK_RUNNING); + +out: + spin_unlock_irqrestore(fence->lock, flags); + return ret; +} +EXPORT_SYMBOL(_kcl_fence_default_wait); +#endif + + +/* + * Modifications [2017-09-19] (c) [2017] + * Advanced Micro Devices, Inc. + */ +#ifdef AMDKCL_FENCE_WAIT_ANY_TIMEOUT +signed long +_kcl_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, + bool intr, signed long timeout, uint32_t *idx) +{ + struct default_wait_cb *cb; + signed long ret = timeout; + unsigned i; + + if (WARN_ON(!fences || !count || timeout < 0)) + return -EINVAL; + + if (timeout == 0) { + for (i = 0; i < count; ++i) + if (dma_fence_is_signaled(fences[i])) { + if (idx) + *idx = i; + return 1; + } + + return 0; + } + + cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL); + if (cb == NULL) { + ret = -ENOMEM; + goto err_free_cb; + } + + for (i = 0; i < count; ++i) { + struct dma_fence *fence = fences[i]; + + + cb[i].task = current; + if (dma_fence_add_callback(fence, &cb[i].base, + _kcl_fence_default_wait_cb)) { + /* This fence is already signaled */ + if (idx) + *idx = i; + goto fence_rm_cb; + } + } + + while (ret > 0) { + if (intr) + set_current_state(TASK_INTERRUPTIBLE); + else + set_current_state(TASK_UNINTERRUPTIBLE); + + if (dma_fence_test_signaled_any(fences, count, idx)) + break; + + ret = schedule_timeout(ret); + + if (ret > 0 && intr && signal_pending(current)) + ret = -ERESTARTSYS; + } + + __set_current_state(TASK_RUNNING); + +fence_rm_cb: + while (i-- > 0) + dma_fence_remove_callback(fences[i], &cb[i].base); + +err_free_cb: + kfree(cb); + + return ret; +} +EXPORT_SYMBOL(_kcl_fence_wait_any_timeout); +#endif + +#ifdef AMDKCL_FENCE_DEFAULT_WAIT_TIMEOUT +signed long +_kcl_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) +{ + signed long ret; + + if (WARN_ON(timeout < 0)) + return -EINVAL; + + /* + * Modifications [2017-03-29] (c) [2017] + * Advanced Micro Devices, Inc. + */ + trace_kcl_fence_wait_start(fence); + if (fence->ops->wait) + ret = fence->ops->wait(fence, intr, timeout); + else + ret = _kcl_fence_default_wait(fence, intr, timeout); + trace_kcl_fence_wait_end(fence); + return ret; +} +EXPORT_SYMBOL(_kcl_fence_wait_timeout); +#endif + +#ifdef AMDKCL_DMA_FENCE_OPS_ENABLE_SIGNALING +bool _kcl_fence_enable_signaling(struct dma_fence *f) +{ + return true; +} +EXPORT_SYMBOL(_kcl_fence_enable_signaling); +#endif +/* + * Modifications [2016-12-23] (c) [2016] + * Advanced Micro Devices, Inc. + */ +void amdkcl_fence_init(void) +{ +#ifdef AMDKCL_FENCE_DEFAULT_WAIT_TIMEOUT + _kcl_fence_default_wait_cb = amdkcl_fp_setup("dma_fence_default_wait_cb", NULL); +#endif +} + +#if !defined(HAVE_DMA_FENCE_DESCRIBE) +/** + * dma_fence_describe - Dump fence describtion into seq_file + * @fence: the 6fence to describe + * @seq: the seq_file to put the textual description into + * + * Dump a textual description of the fence and it's state into the seq_file. + */ +void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) +{ + seq_printf(seq, "%s %s seq %llu %ssignalled\n", + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), fence->seqno, + dma_fence_is_signaled(fence) ? "" : "un"); +} +EXPORT_SYMBOL(dma_fence_describe); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_fence_trace.h b/drivers/gpu/drm/amd/amdkcl/kcl_fence_trace.h new file mode 100644 index 0000000000000..5a74e165c087f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_fence_trace.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copied from include/trace/events/dma_fence.h */ +#if !defined(_TRACE_KCL_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KCL_FENCE_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kcl_fence +#define TRACE_INCLUDE_FILE kcl_fence_trace + +struct dma_fence; + +DECLARE_EVENT_CLASS(kcl_fence, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence), + + TP_STRUCT__entry( + __string(driver, fence->ops->get_driver_name(fence)) + __string(timeline, fence->ops->get_timeline_name(fence)) + __field(unsigned int, context) + __field(unsigned int, seqno) + ), + + TP_fast_assign( + __amdkcl_assign_str(driver, fence->ops->get_driver_name(fence)); + __amdkcl_assign_str(timeline, fence->ops->get_timeline_name(fence)); + __entry->context = fence->context; + __entry->seqno = fence->seqno; + ), + + TP_printk("driver=%s timeline=%s context=%u seqno=%u", + __get_str(driver), __get_str(timeline), __entry->context, + __entry->seqno) +); + +DEFINE_EVENT(kcl_fence, kcl_fence_init, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(kcl_fence, kcl_fence_enable_signal, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(kcl_fence, kcl_fence_wait_start, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(kcl_fence, kcl_fence_wait_end, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +#endif /* _TRACE_KCL_FENCE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_fs_read_write.c b/drivers/gpu/drm/amd/amdkcl/kcl_fs_read_write.c new file mode 100644 index 0000000000000..e45c10eabc006 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_fs_read_write.c @@ -0,0 +1,27 @@ +/* + * linux/fs/read_write.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#include +#include + +/* Copied from v4.13-rc7-6-ge13ec939e96b:fs/read_write.c */ +#ifndef HAVE_KERNEL_WRITE_PPOS +ssize_t _kcl_kernel_write(struct file *file, const void *buf, size_t count, + loff_t *pos) +{ + mm_segment_t old_fs; + ssize_t res; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = vfs_write(file, (__force const char __user *)buf, count, pos); + set_fs(old_fs); + + return res; +} +EXPORT_SYMBOL(_kcl_kernel_write); +#endif + diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_io.c b/drivers/gpu/drm/amd/amdkcl/kcl_io.c new file mode 100644 index 0000000000000..c1f2307557352 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_io.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Page Attribute Table (PAT) support: handle memory caching attributes in page tables. + * + * Authors: Venkatesh Pallipadi + * Suresh B Siddha + * + * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. + * + * Basic principles: + * + * PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and + * the kernel to set one of a handful of 'caching type' attributes for physical + * memory ranges: uncached, write-combining, write-through, write-protected, + * and the most commonly used and default attribute: write-back caching. + * + * PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is + * a hardware interface to enumerate a limited number of physical memory ranges + * and set their caching attributes explicitly, programmed into the CPU via MSRs. + * Even modern CPUs have MTRRs enabled - but these are typically not touched + * by the kernel or by user-space (such as the X server), we rely on PAT for any + * additional cache attribute logic. + * + * PAT doesn't work via explicit memory ranges, but uses page table entries to add + * cache attribute information to the mapped memory range: there's 3 bits used, + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the + * CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT). + * + * ( There's a metric ton of finer details, such as compatibility with CPU quirks + * that only support 4 types of PAT entries, and interaction with MTRRs, see + * below for details. ) + */ +#include +#include + +/* Copied from arch/x86/mm/pat.c and modified for KCL */ +#if !defined(HAVE_ARCH_IO_RESERVE_FREE_MEMTYPE_WC) && \ + defined(CONFIG_X86) +#include + +static int (*_kcl_io_reserve_memtype)(resource_size_t start, resource_size_t end, + enum page_cache_mode *type); +static void (*_kcl_io_free_memtype)(resource_size_t start, resource_size_t end); + +int _kcl_arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ +#ifdef _PAGE_CACHE_WC + unsigned long type = _PAGE_CACHE_WC; +#else + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; +#endif + + return _kcl_io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(_kcl_arch_io_reserve_memtype_wc); + +void _kcl_arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + _kcl_io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(_kcl_arch_io_free_memtype_wc); + +void amdkcl_io_init(void) +{ + _kcl_io_reserve_memtype = amdkcl_fp_setup("io_reserve_memtype", NULL); + _kcl_io_free_memtype = amdkcl_fp_setup("io_free_memtype", NULL); +} +#else +void amdkcl_io_init(void) +{ + +} +#endif /* HAVE_ARCH_IO_RESERVE_FREE_MEMTYPE_WC */ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_ioctl.c b/drivers/gpu/drm/amd/amdkcl/kcl_ioctl.c new file mode 100644 index 0000000000000..aef47eda7f4ab --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_ioctl.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/fs/ioctl.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#include +#include + +/* Copied from v5.4-rc2-1-g2952db0fd51b fs/ioctl.c */ +#ifndef HAVE_COMPAT_PTR_IOCTL +#ifdef CONFIG_COMPAT +/** + * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation + * + * This is not normally called as a function, but instead set in struct + * file_operations as + * + * .compat_ioctl = compat_ptr_ioctl, + * + * On most architectures, the compat_ptr_ioctl() just passes all arguments + * to the corresponding ->ioctl handler. The exception is arch/s390, where + * compat_ptr() clears the top bit of a 32-bit pointer value, so user space + * pointers to the second 2GB alias the first 2GB, as is the case for + * native 32-bit s390 user space. + * + * The compat_ptr_ioctl() function must therefore be used only with ioctl + * functions that either ignore the argument or pass a pointer to a + * compatible data type. + * + * If any ioctl command handled by fops->unlocked_ioctl passes a plain + * integer instead of a pointer, or any of the passed data types + * is incompatible between 32-bit and 64-bit architectures, a proper + * handler is required instead of compat_ptr_ioctl. + */ +long _kcl_compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + if (!file->f_op->unlocked_ioctl) + return -ENOIOCTLCMD; + + return file->f_op->unlocked_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +EXPORT_SYMBOL(_kcl_compat_ptr_ioctl); +#endif /* CONFIG_COMPAT */ +#endif /* HAVE_COMPAT_PTR_IOCTL */ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_irqdesc.c b/drivers/gpu/drm/amd/amdkcl/kcl_irqdesc.c new file mode 100644 index 0000000000000..e53a60dbb71f0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_irqdesc.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the interrupt descriptor management code. Detailed + * information is available in Documentation/core-api/genericirq.rst + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/** + * generic_handle_irq - Invoke the handler for a particular irq + * @irq: The irq number to handle + * + * Returns: 0 on success, or -EINVAL if conversion has failed + * + * This function must be called from an IRQ context with irq regs + * initialized. + */ +#ifndef HAVE_GENERIC_HANDLE_DOMAIN_IRQ +int kcl_generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) +{ + int irq; + irq = irq_find_mapping(domain, hwirq); + + return generic_handle_irq(irq); +} +EXPORT_SYMBOL_GPL(kcl_generic_handle_domain_irq); +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_kernel_params.c b/drivers/gpu/drm/amd/amdkcl/kcl_kernel_params.c new file mode 100644 index 0000000000000..10fe1c5d9d9c4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_kernel_params.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Helpers for initial module or kernel cmdline parsing + Copyright (C) 2001 Rusty Russell. + +*/ +#include + +// Copied from kernel/params.c +#define STANDARD_PARAM_DEF(name, type, format, strtolfn) \ + int param_set_##name(const char *val, const struct kernel_param *kp) \ + { \ + return strtolfn(val, 0, (type *)kp->arg); \ + } \ + int param_get_##name(char *buffer, const struct kernel_param *kp) \ + { \ + return scnprintf(buffer, PAGE_SIZE, format "\n", \ + *((type *)kp->arg)); \ + } \ + const struct kernel_param_ops param_ops_##name = { \ + .set = param_set_##name, \ + .get = param_get_##name, \ + }; \ + EXPORT_SYMBOL(param_set_##name); \ + EXPORT_SYMBOL(param_get_##name); \ + EXPORT_SYMBOL(param_ops_##name) + +#ifdef _kcl_param_check_hexint +STANDARD_PARAM_DEF(hexint, unsigned int, "%#08x", kstrtouint); +#endif + +#ifdef _kcl_param_check_ullong +STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); +#endif \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_kthread.c b/drivers/gpu/drm/amd/amdkcl/kcl_kthread.c new file mode 100644 index 0000000000000..df0b9d1c52b25 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_kthread.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Kernel thread helper functions. + * Copyright (C) 2004 IBM Corporation, Rusty Russell. + * Copyright (C) 2009 Red Hat, Inc. + * + * Creation is done via kthreadd, so that we get a clean environment + * even if we're invoked from userspace (think modprobe, hotplug cpu, + * etc.). + */ + +/* +* FIXME: implement below API when kernel version < 4.2 +*/ +#include +#include +#include + +#if !defined(HAVE___KTHREAD_SHOULD_PARK) +bool __kcl_kthread_should_park(struct task_struct *k) +{ + pr_warn_once("This kernel version not support API: __kthread_should_park!\n"); + return false; +} +EXPORT_SYMBOL(__kcl_kthread_should_park); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_mce_amd.c b/drivers/gpu/drm/amd/amdkcl/kcl_mce_amd.c new file mode 100644 index 0000000000000..e2cd6191d7171 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_mce_amd.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (c) 2005-2016 Advanced Micro Devices, Inc. + * + * Written by Jacob Shin - AMD, Inc. + * Maintained by: Borislav Petkov + * + * All MC4_MISCi registers are shared between cores on a node. + */ +#ifdef CONFIG_X86_MCE_AMD +#include + +#if !defined(HAVE_SMCA_GET_BANK_TYPE_WITH_TWO_ARGUMENTS) && !defined(HAVE_SMCA_GET_BANK_TYPE_WITH_ONE_ARGUMENT) +#if defined(HAVE_STRUCT_SMCA_BANK) +enum smca_bank_types smca_get_bank_type(unsigned int bank) +{ + struct smca_bank *b; + + if (bank >= MAX_NR_BANKS) + return N_SMCA_BANK_TYPES; + + b = &smca_banks[bank]; + if (!b->hwid) + return N_SMCA_BANK_TYPES; + + return b->hwid->bank_type; +} +#else +int smca_get_bank_type(unsigned int bank) +{ + pr_warn_once("smca_get_bank_type is not supported\n"); + return 0; +} +#endif +EXPORT_SYMBOL_GPL(smca_get_bank_type); +#endif + +#endif /* CONFIG_X86_MCE_AMD */ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_memory.c b/drivers/gpu/drm/amd/amdkcl/kcl_memory.c new file mode 100644 index 0000000000000..f5d947730e628 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_memory.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/************************************************************************** + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom + */ +#include + +/* Copied from drivers/gpu/drm/ttm/ttm_bo_vm.c and modified for KCL */ +#ifndef HAVE_VMF_INSERT_MIXED_PROT +vm_fault_t _kcl_vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn, pgprot_t pgprot) +{ + struct vm_area_struct cvma = *vma; + + cvma.vm_page_prot = pgprot; + + return vmf_insert_mixed(&cvma, addr, pfn); +} +EXPORT_SYMBOL(_kcl_vmf_insert_mixed_prot); +#endif + +#ifndef HAVE_VMF_INSERT_PFN_PROT +#ifndef HAVE_VM_INSERT_PFN_PROT +int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ + struct vm_area_struct cvma = *vma; + + cvma.vm_page_prot = pgprot; + + return vm_insert_pfn(&cvma, addr, pfn); +} +#endif + +vm_fault_t _kcl_vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ + int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); + + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL(_kcl_vmf_insert_pfn_prot); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_mm.c b/drivers/gpu/drm/amd/amdkcl/kcl_mm.c new file mode 100644 index 0000000000000..d2836f42adf31 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_mm.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/kernel/fork.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ +#include +#include +#include + +#ifndef HAVE_MMPUT_ASYNC +void (*_kcl_mmput_async)(struct mm_struct *mm); +EXPORT_SYMBOL(_kcl_mmput_async); + +void __kcl_mmput_async(struct mm_struct *mm) +{ + pr_warn_once("This kernel version not support API: mmput_async !\n"); +} +#endif + +#ifndef HAVE_ZONE_DEVICE_PAGE_INIT +/* copied from v6.0-rc3-597-g0dc45ca1ce18 mm/memremap.c and modified for kcl usage */ +void zone_device_page_init(struct page *page) +{ +/* v5.17-rc4-75-g27674ef6c73f mm: remove the extra ZONE_DEVICE struct page refcount */ +#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) + get_page(page); +#endif + lock_page(page); +} +EXPORT_SYMBOL_GPL(zone_device_page_init); +#endif + +#ifndef HAVE_KMALLOC_SIZE_ROUNDUP +#ifndef CONFIG_SLOB +extern struct kmem_cache *(*_kcl_kmalloc_slab)(size_t size, gfp_t flags); +#endif +#endif /* HAVE_KMALLOC_SIZE_ROUNDUP */ + +#ifndef HAVE_KVREALLOC +void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) +{ + void *newp; + + if (oldsize >= newsize) + return (void *)p; + newp = kvmalloc(newsize, flags); + if (!newp) + return NULL; + memcpy(newp, p, oldsize); + kvfree(p); + return newp; +} +EXPORT_SYMBOL(kvrealloc); +#endif + + +void amdkcl_mm_init(void) +{ +#ifndef HAVE_MMPUT_ASYNC + _kcl_mmput_async = amdkcl_fp_setup("mmput_async", __kcl_mmput_async); +#endif + +#ifndef HAVE_KMALLOC_SIZE_ROUNDUP +#ifndef CONFIG_SLOB + _kcl_kmalloc_slab = amdkcl_fp_setup("kmalloc_slab", NULL); +#endif +#endif +} diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_mm_slab.c b/drivers/gpu/drm/amd/amdkcl/kcl_mm_slab.c new file mode 100644 index 0000000000000..3de9dfff5d0df --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_mm_slab.c @@ -0,0 +1,44 @@ +#include +#include +#include + +#if !defined(HAVE_KMALLOC_SIZE_ROUNDUP) +#ifdef CONFIG_SLOB +/* copy from mm/slob.c */ +size_t kmalloc_size_roundup(size_t size) +{ + /* Short-circuit the 0 size case. */ + if (unlikely(size == 0)) + return 0; + /* Short-circuit saturated "too-large" case. */ + if (unlikely(size == SIZE_MAX)) + return SIZE_MAX; + + return ALIGN(size, ARCH_KMALLOC_MINALIGN); +} + +EXPORT_SYMBOL(kmalloc_size_roundup); +#else +/* copy from mm/slab_common.c and modified for KCL usage. */ +struct kmem_cache *(*_kcl_kmalloc_slab)(size_t size, gfp_t flags); +size_t kmalloc_size_roundup(size_t size) +{ + struct kmem_cache *c; + + /* Short-circuit the 0 size case. */ + if (unlikely(size == 0)) + return 0; + /* Short-circuit saturated "too-large" case. */ + if (unlikely(size == SIZE_MAX)) + return SIZE_MAX; + /* Above the smaller buckets, size is a multiple of page size. */ + if (size > KMALLOC_MAX_CACHE_SIZE) + return PAGE_SIZE << get_order(size); + + /* The flags don't matter since size_index is common to all. */ + c = _kcl_kmalloc_slab(size, GFP_KERNEL); + return c ? kmem_cache_size(c) : 0; +} +EXPORT_SYMBOL(kmalloc_size_roundup); +#endif +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_mn.c b/drivers/gpu/drm/amd/amdkcl/kcl_mn.c new file mode 100644 index 0000000000000..20a0c2c5a9280 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_mn.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +/* Copied from v3.16-6588-gb972216e27d1 mm/mmu_notifier.c */ +#if !defined(HAVE_MMU_NOTIFIER_CALL_SRCU) && \ + !defined(HAVE_MMU_NOTIFIER_PUT) +/* + * Modifications [2017-03-14] (c) [2017] + */ + +/* + * This function allows mmu_notifier::release callback to delay a call to + * a function that will free appropriate resources. The function must be + * quick and must not block. + */ +void mmu_notifier_call_srcu(struct rcu_head *rcu, + void (*func)(struct rcu_head *rcu)) +{ + /* changed from call_srcu to call_rcu */ + call_rcu(rcu, func); +} +EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu); + +void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier_mm->lock); + /* + * Can not use list_del_rcu() since __mmu_notifier_release + * can delete it before we hold the lock. + */ + hlist_del_init_rcu(&mn->hlist); + spin_unlock(&mm->mmu_notifier_mm->lock); + + BUG_ON(atomic_read(&mm->mm_count) <= 0); + mmdrop(mm); +} +EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_numa.c b/drivers/gpu/drm/amd/amdkcl/kcl_numa.c new file mode 100644 index 0000000000000..92605529089d1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_numa.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +#ifndef HAVE_PXM_TO_NODE +int (*_kcl_pxm_to_node)(int pxm); +EXPORT_SYMBOL(_kcl_pxm_to_node); + +/* Copied from include/acpi/acpi_numa.h */ +static int __kcl_pxm_to_node_stub(int pxm) +{ + return 0; +} +#endif + +void amdkcl_numa_init(void) +{ +#ifndef HAVE_PXM_TO_NODE + _kcl_pxm_to_node = amdkcl_fp_setup("pxm_to_node", __kcl_pxm_to_node_stub); +#endif +} diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_page_alloc.c b/drivers/gpu/drm/amd/amdkcl/kcl_page_alloc.c new file mode 100644 index 0000000000000..4b6735959d945 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_page_alloc.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/mm/page_alloc.c + * + * Manages the free list, the system allocates free pages here. + * Note that kmalloc() lives in slab.c + * + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * Swap reorganised 29.12.95, Stephen Tweedie + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999 + * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 + * Zone balancing, Kanoj Sarcar, SGI, Jan 2000 + * Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002 + * (lots of bits borrowed from Ingo Molnar & Andrew Morton) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include + +#include +#include +#include +//#include "internal.h" +//#include "shuffle.h" +//#include "page_reporting.h" + +/* Copied from mm/page_allo.c */ +#ifndef HAVE_FS_RECLAIM_ACQUIRE +#ifdef CONFIG_LOCKDEP +static struct lockdep_map __fs_reclaim_map = + STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map); + +static bool __need_reclaim(gfp_t gfp_mask) +{ + /* no reclaim without waiting on it */ + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) + return false; + + /* this guy won't enter reclaim */ + if (current->flags & PF_MEMALLOC) + return false; + + if (gfp_mask & __GFP_NOLOCKDEP) + return false; + + return true; +} + +void __fs_reclaim_acquire(void) +{ + lock_map_acquire(&__fs_reclaim_map); +} + +void __fs_reclaim_release(void) +{ + lock_map_release(&__fs_reclaim_map); +} + +void _kcl_fs_reclaim_acquire(gfp_t gfp_mask) +{ + gfp_mask = current_gfp_context(gfp_mask); + + if (__need_reclaim(gfp_mask)) { + if (gfp_mask & __GFP_FS) + __fs_reclaim_acquire(); + +#ifdef CONFIG_MMU_NOTIFIER + lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); + lock_map_release(&__mmu_notifier_invalidate_range_start_map); +#endif + + } +} +EXPORT_SYMBOL_GPL(_kcl_fs_reclaim_acquire); + +void _kcl_fs_reclaim_release(gfp_t gfp_mask) +{ + gfp_mask = current_gfp_context(gfp_mask); + + if (__need_reclaim(gfp_mask)) { + if (gfp_mask & __GFP_FS) + __fs_reclaim_release(); + } +} +EXPORT_SYMBOL_GPL(_kcl_fs_reclaim_release); +#endif +#endif /* HAVE_FS_RECLAIM_ACQUIRE */ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_pci.c b/drivers/gpu/drm/amd/amdkcl/kcl_pci.c new file mode 100644 index 0000000000000..742ba33ab1884 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_pci.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI Bus Services, see include/linux/pci.h for further explanation. + * + * Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter, + * David Mosberger-Tang + * + * Copyright 1997 -- 2000 Martin Mares + * For codes copied from drivers/pci/pci.c + * + * (C) Copyright 2002-2004 Greg Kroah-Hartman + * (C) Copyright 2002-2004 IBM Corp. + * (C) Copyright 2003 Matthew Wilcox + * (C) Copyright 2003 Hewlett-Packard + * (C) Copyright 2004 Jon Smirl + * (C) Copyright 2004 Silicon Graphics, Inc. Jesse Barnes + * For codes copied from drivers/pci/pci-sysfs.c + */ + +#include +#include +#include + +enum pci_bus_speed (*_kcl_pcie_get_speed_cap)(struct pci_dev *dev); +EXPORT_SYMBOL(_kcl_pcie_get_speed_cap); + +enum pcie_link_width (*_kcl_pcie_get_width_cap)(struct pci_dev *dev); +EXPORT_SYMBOL(_kcl_pcie_get_width_cap); + +#if !defined(HAVE_PCI_CONFIGURE_EXTENDED_TAGS) +void _kcl_pci_configure_extended_tags(struct pci_dev *dev) +{ + u32 cap; + u16 ctl; + int ret; + + if (!pci_is_pcie(dev)) + return; + + ret = pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); + if (ret) + return; + + if (!(cap & PCI_EXP_DEVCAP_EXT_TAG)) + return; + + ret = pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &ctl); + if (ret) + return; + + if (!(ctl & PCI_EXP_DEVCTL_EXT_TAG)) { + pcie_capability_set_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_EXT_TAG); + } +} +EXPORT_SYMBOL(_kcl_pci_configure_extended_tags); +#endif + +#ifndef HAVE_PCI_PR3_PRESENT +#ifdef CONFIG_ACPI +bool _kcl_pci_pr3_present(struct pci_dev *pdev) +{ + struct acpi_device *adev; + + if (acpi_disabled) + return false; + + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) + return false; + + return adev->power.flags.power_resources && + acpi_has_method(adev->handle, "_PR3"); +} +EXPORT_SYMBOL_GPL(_kcl_pci_pr3_present); +#endif +#endif /* HAVE_PCI_PR3_PRESENT */ + +#ifdef AMDKCL_CREATE_MEASURE_FILE +/* Copied from drivers/pci/pci-sysfs.c */ +static ssize_t max_link_speed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf(buf, "%s\n", PCIE_SPEED2STR(kcl_pcie_get_speed_cap(pdev))); +} +static DEVICE_ATTR_RO(max_link_speed); + +static ssize_t max_link_width_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return sprintf(buf, "%u\n", kcl_pcie_get_width_cap(pdev)); +} +static DEVICE_ATTR_RO(max_link_width); + +static ssize_t current_link_speed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + u16 linkstat; + int err; + const char *speed; + + err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &linkstat); + if (err) + return -EINVAL; + + switch (linkstat & PCI_EXP_LNKSTA_CLS) { + case PCI_EXP_LNKSTA_CLS_16_0GB: + speed = "16 GT/s"; + break; + case PCI_EXP_LNKSTA_CLS_8_0GB: + speed = "8 GT/s"; + break; + case PCI_EXP_LNKSTA_CLS_5_0GB: + speed = "5 GT/s"; + break; + case PCI_EXP_LNKSTA_CLS_2_5GB: + speed = "2.5 GT/s"; + break; + default: + speed = "Unknown speed"; + } + + return sprintf(buf, "%s\n", speed); +} +static DEVICE_ATTR_RO(current_link_speed); + +static ssize_t current_link_width_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pci_dev = to_pci_dev(dev); + u16 linkstat; + int err; + + err = pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &linkstat); + if (err) + return -EINVAL; + + return sprintf(buf, "%u\n", + (linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT); +} +static DEVICE_ATTR_RO(current_link_width); + +static struct attribute *pcie_dev_attrs[] = { + &dev_attr_current_link_speed.attr, + &dev_attr_current_link_width.attr, + &dev_attr_max_link_width.attr, + &dev_attr_max_link_speed.attr, + NULL, +}; + +int _kcl_pci_create_measure_file(struct pci_dev *pdev) +{ + int ret = 0; + + ret = device_create_file(&pdev->dev, &dev_attr_current_link_speed); + if (ret) { + dev_err(&pdev->dev, + "Failed to create current_link_speed sysfs files: %d\n", ret); + return ret; + } + + ret = device_create_file(&pdev->dev, &dev_attr_current_link_width); + if (ret) { + dev_err(&pdev->dev, + "Failed to create current_link_width sysfs files: %d\n", ret); + return ret; + } + + ret = device_create_file(&pdev->dev, &dev_attr_max_link_width); + if (ret) { + dev_err(&pdev->dev, + "Failed to create max_link_width sysfs files: %d\n", ret); + return ret; + } + + ret = device_create_file(&pdev->dev, &dev_attr_max_link_speed); + if (ret) { + dev_err(&pdev->dev, + "Failed to create max_link_speed sysfs files: %d\n", ret); + return ret; + } + + return ret; +} +EXPORT_SYMBOL(_kcl_pci_create_measure_file); + +void _kcl_pci_remove_measure_file(struct pci_dev *pdev) +{ + device_remove_file(&pdev->dev, &dev_attr_current_link_speed); + device_remove_file(&pdev->dev, &dev_attr_current_link_width); + device_remove_file(&pdev->dev, &dev_attr_max_link_width); + device_remove_file(&pdev->dev, &dev_attr_max_link_speed); +} +EXPORT_SYMBOL(_kcl_pci_remove_measure_file); +#endif /* AMDKCL_CREATE_MEASURE_FILE */ + +#ifdef AMDKCL_ENABLE_RESIZE_FB_BAR +/* Copied from drivers/pci/pci.c */ +#ifndef HAVE_PCI_REBAR_BYTES_TO_SIZE +static int _kcl_pci_rebar_find_pos(struct pci_dev *pdev, int bar) +{ + unsigned int pos, nbars, i; + u32 ctrl; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR); + if (!pos) + return -ENOTSUPP; + + pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl); + nbars = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> + PCI_REBAR_CTRL_NBAR_SHIFT; + + for (i = 0; i < nbars; i++, pos += 8) { + int bar_idx; + + pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl); + bar_idx = ctrl & PCI_REBAR_CTRL_BAR_IDX; + if (bar_idx == bar) + return pos; + } + + return -ENOENT; +} + +u32 _kcl_pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar) +{ + int pos; + u32 cap; + + pos = _kcl_pci_rebar_find_pos(pdev, bar); + if (pos < 0) + return 0; + + pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, &cap); + cap &= PCI_REBAR_CAP_SIZES; + + /* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */ + if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f && + bar == 0 && cap == 0x7000) + cap = 0x3f000; + + return cap >> 4; +} +EXPORT_SYMBOL(_kcl_pci_rebar_get_possible_sizes); +#endif /* HAVE_PCI_REBAR_BYTES_TO_SIZE */ +#endif /* AMDKCL_ENABLE_RESIZE_FB_BAR */ + +/* Copied from drivers/pci/pci.c */ +#ifndef HAVE_PCI_GET_BASE_CLASS +static inline const struct pci_device_id * +pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) +{ + if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && + (id->device == PCI_ANY_ID || id->device == dev->device) && + (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && + (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && + !((id->class ^ dev->class) & id->class_mask)) + return id; + return NULL; +} + +static int match_pci_dev_by_id(struct device *dev, const void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + const struct pci_device_id *id = data; + + if (pci_match_one_device(id, pdev)) + return 1; + return 0; +} + +static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id, + struct pci_dev *from) +{ + struct device *dev; + struct device *dev_start = NULL; + struct pci_dev *pdev = NULL; + + if (from) + dev_start = &from->dev; + dev = bus_find_device(&pci_bus_type, dev_start, (void *)id, + match_pci_dev_by_id); + if (dev) + pdev = to_pci_dev(dev); + pci_dev_put(from); + return pdev; +} + +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from) +{ + struct pci_device_id id = { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .class_mask = 0xFF0000, + .class = class << 16, + }; + + return pci_get_dev_by_id(&id, from); +} +EXPORT_SYMBOL(pci_get_base_class); +#endif /*HAVE_PCI_GET_BASE_CLASS*/ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_seq_file.c b/drivers/gpu/drm/amd/amdkcl/kcl_seq_file.c new file mode 100644 index 0000000000000..725ca1cafbfc8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_seq_file.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/fs/seq_file.c + * + * helper functions for making synthetic files from sequences of records. + * initial implementation -- AV, Oct 2001. + */ +#include + +/* Copied from fs/seq_file.c */ +#ifndef HAVE_SEQ_HEX_DUMP +static void seq_set_overflow(struct seq_file *m) +{ + m->count = m->size; +} + +/* A complete analogue of print_hex_dump() */ +void _kcl_seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, const void *buf, size_t len, + bool ascii) +{ + const u8 *ptr = buf; + int i, linelen, remaining = len; + int ret; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + for (i = 0; i < len && !seq_has_overflowed(m); i += rowsize) { + linelen = min(remaining, rowsize); + remaining -= rowsize; + + switch (prefix_type) { + case DUMP_PREFIX_ADDRESS: + seq_printf(m, "%s%p: ", prefix_str, ptr + i); + break; + case DUMP_PREFIX_OFFSET: + seq_printf(m, "%s%.8x: ", prefix_str, i); + break; + default: + seq_printf(m, "%s", prefix_str); + break; + } + + ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, + m->buf + m->count, m->size - m->count, + ascii); + if (ret >= m->size - m->count) { + seq_set_overflow(m); + } else { + m->count += ret; + seq_putc(m, '\n'); + } + } +} +EXPORT_SYMBOL(_kcl_seq_hex_dump); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_suspend.c b/drivers/gpu/drm/amd/amdkcl/kcl_suspend.c new file mode 100644 index 0000000000000..c7f1086ebabd3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_suspend.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kernel/power/main.c - PM subsystem core functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + */ +#include +#include + +#ifndef HAVE_KSYS_SYNC_HELPER +/* Copied from kernel/power/main.c */ +#ifdef CONFIG_PM_SLEEP +long (*_kcl_ksys_sync)(void); + +void _kcl_ksys_sync_helper(void) +{ + pr_info("Syncing filesystems ... "); + _kcl_ksys_sync(); + pr_cont("done.\n"); +} +EXPORT_SYMBOL(_kcl_ksys_sync_helper); + +static bool _kcl_sys_sync_stub(void) +{ + pr_warn_once("kernel symbol [k]sys_sync not found!\n"); + return false; +} +#endif /* CONFIG_PM_SLEEP */ +#endif /* HAVE_KSYS_SYNC_HELPER */ + +void amdkcl_suspend_init(void) +{ +#ifndef HAVE_KSYS_SYNC_HELPER +#ifdef CONFIG_PM_SLEEP + _kcl_ksys_sync = amdkcl_fp_setup("ksys_sync", _kcl_sys_sync_stub); + if (_kcl_ksys_sync != _kcl_sys_sync_stub) { + return; + } + + _kcl_ksys_sync = amdkcl_fp_setup("sys_sync", _kcl_sys_sync_stub); + if (_kcl_ksys_sync != _kcl_sys_sync_stub) { + return; + } + + pr_err_once("Error: fail to get symbol [k]sys_sync!\n"); + BUG(); +#endif /* CONFIG_PM_SLEEP */ +#endif /* HAVE_KSYS_SYNC_HELPER */ +} + diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_sysfs_emit.c b/drivers/gpu/drm/amd/amdkcl/kcl_sysfs_emit.c new file mode 100644 index 0000000000000..0b23918cc8486 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_sysfs_emit.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/sysfs/file.c - sysfs regular (text) file implementation + * + * Copyright (c) 2001-3 Patrick Mochel + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007 Tejun Heo + * + * Please see Documentation/filesystems/sysfs.rst for more information. + */ +#include +#include + +/* Copied from fs/sysfs/file.c */ +#ifndef HAVE_SYSFS_EMIT +int sysfs_emit(char *buf, const char *fmt, ...) +{ + va_list args; + int len; + + if (WARN(!buf || offset_in_page(buf), + "invalid sysfs_emit: buf:%p\n", buf)) + return 0; + + va_start(args, fmt); + len = vscnprintf(buf, PAGE_SIZE, fmt, args); + va_end(args); + + return len; +} +EXPORT_SYMBOL_GPL(sysfs_emit); + +/** + * sysfs_emit_at - scnprintf equivalent, aware of PAGE_SIZE buffer. + * @buf: start of PAGE_SIZE buffer. + * @at: offset in @buf to start write in bytes + * @at must be >= 0 && < PAGE_SIZE + * @fmt: format + * @...: optional arguments to @fmt + * + * + * Returns number of characters written starting at &@buf[@at]. + */ +int sysfs_emit_at(char *buf, int at, const char *fmt, ...) +{ + va_list args; + int len; + + if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE, + "invalid sysfs_emit_at: buf:%p at:%d\n", buf, at)) + return 0; + + va_start(args, fmt); + len = vscnprintf(buf + at, PAGE_SIZE - at, fmt, args); + va_end(args); + + return len; +} +EXPORT_SYMBOL_GPL(sysfs_emit_at); + +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_time.c b/drivers/gpu/drm/amd/amdkcl/kcl_time.c new file mode 100644 index 0000000000000..a6394747da818 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_time.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This file contains the interface functions for the various time related + * system calls: time, stime, gettimeofday, settimeofday, adjtime + * + * Modification history: + * + * 1993-09-02 Philip Gladstone + * Created file with time related functions from sched/core.c and adjtimex() + * 1993-10-08 Torsten Duwe + * adjtime interface update and CMOS clock write code + * 1995-08-13 Torsten Duwe + * kernel PLL updated to 1994-12-13 specs (rfc-1589) + * 1999-01-16 Ulrich Windl + * Introduced error checking for many cases in adjtimex(). + * Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * Allow time_constant larger than MAXTC(6) for NTP v4 (MAXTC == 10) + * (Even though the technical memorandum forbids it) + * 2004-07-14 Christoph Lameter + * Added getnstimeofday to allow the posix timer functions to return + * with nanosecond accuracy + */ +#include +#include + +#ifndef HAVE_JIFFIES64_TO_MSECS +/* Copied from kernel/time/time.c */ +u64 jiffies64_to_msecs(const u64 j) +{ +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +#else + return div_u64(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN); +#endif +} +EXPORT_SYMBOL(jiffies64_to_msecs); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_vmscan.c b/drivers/gpu/drm/amd/amdkcl/kcl_vmscan.c new file mode 100644 index 0000000000000..fb57e87ff981b --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_vmscan.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Swap reorganised 29.12.95, Stephen Tweedie. + * kswapd added: 7.1.96 sct + * Removed kswapd_ctl limits, and swap out as many pages as needed + * to bring the system back to freepages.high: 2.4.97, Rik van Riel. + * Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com). + * Multiqueue VM started 5.8.00, Rik van Riel. + */ +#include + +#ifndef HAVE_SYNCHRONIZE_SHRINKERS +static DECLARE_RWSEM(shrinker_rwsem); + +/** + * synchronize_shrinkers - Wait for all running shrinkers to complete. + * + * This is equivalent to calling unregister_shrink() and register_shrinker(), + * but atomically and with less overhead. This is useful to guarantee that all + * shrinker invocations have seen an update, before freeing memory, similar to + * rcu. + */ +void synchronize_shrinkers(void) +{ + down_write(&shrinker_rwsem); + up_write(&shrinker_rwsem); +} +EXPORT_SYMBOL(synchronize_shrinkers); +#endif diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_wbrf.c b/drivers/gpu/drm/amd/amdkcl/kcl_wbrf.c new file mode 100644 index 0000000000000..3299b4e78c7a7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_wbrf.c @@ -0,0 +1,319 @@ +//SPDX-License-Identifier: GPL-2.0 +/* + * Wifi Frequency Band Manage Interface + * Copyright (C) 2023 Advanced Micro Devices + */ + +#include +#include + +#ifndef HAVE_LINUX_ACPI_AMD_WBRF_H +/* + * Functions bit vector for WBRF method + * + * Bit 0: WBRF supported. + * Bit 1: Function 1 (Add / Remove frequency) is supported. + * Bit 2: Function 2 (Get frequency list) is supported. + */ +#define WBRF_ENABLED 0x0 +#define WBRF_RECORD 0x1 +#define WBRF_RETRIEVE 0x2 + +#define WBRF_REVISION 0x1 + +/* + * The data structure used for WBRF_RETRIEVE is not naturally aligned. + * And unfortunately the design has been settled down. + */ +struct amd_wbrf_ranges_out { + u32 num_of_ranges; + struct freq_band_range band_list[MAX_NUM_OF_WBRF_RANGES]; +} __packed; + +static const guid_t wifi_acpi_dsm_guid = + GUID_INIT(0x7b7656cf, 0xdc3d, 0x4c1c, + 0x83, 0xe9, 0x66, 0xe7, 0x21, 0xde, 0x30, 0x70); + +/* + * Used to notify consumer (amdgpu driver currently) about + * the wifi frequency is change. + */ +static BLOCKING_NOTIFIER_HEAD(wbrf_chain_head); + +static int wbrf_record(struct acpi_device *adev, uint8_t action, struct wbrf_ranges_in_out *in) +{ + union acpi_object argv4; + union acpi_object *tmp; + union acpi_object *obj; + u32 num_of_ranges = 0; + u32 num_of_elements; + u32 arg_idx = 0; + int ret; + u32 i; + + if (!in) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(in->band_list); i++) { + if (in->band_list[i].start && in->band_list[i].end) + num_of_ranges++; + } + + /* + * The num_of_ranges value in the "in" object supplied by + * the caller is required to be equal to the number of + * entries in the band_list array in there. + */ + if (num_of_ranges != in->num_of_ranges) + return -EINVAL; + + /* + * Every input frequency band comes with two end points(start/end) + * and each is accounted as an element. Meanwhile the range count + * and action type are accounted as an element each. + * So, the total element count = 2 * num_of_ranges + 1 + 1. + */ + num_of_elements = 2 * num_of_ranges + 2; + + tmp = kcalloc(num_of_elements, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + argv4.package.type = ACPI_TYPE_PACKAGE; + argv4.package.count = num_of_elements; + argv4.package.elements = tmp; + + /* save the number of ranges*/ + tmp[0].integer.type = ACPI_TYPE_INTEGER; + tmp[0].integer.value = num_of_ranges; + + /* save the action(WBRF_RECORD_ADD/REMOVE/RETRIEVE) */ + tmp[1].integer.type = ACPI_TYPE_INTEGER; + tmp[1].integer.value = action; + + arg_idx = 2; + for (i = 0; i < ARRAY_SIZE(in->band_list); i++) { + if (!in->band_list[i].start || !in->band_list[i].end) + continue; + + tmp[arg_idx].integer.type = ACPI_TYPE_INTEGER; + tmp[arg_idx++].integer.value = in->band_list[i].start; + tmp[arg_idx].integer.type = ACPI_TYPE_INTEGER; + tmp[arg_idx++].integer.value = in->band_list[i].end; + } + + obj = acpi_evaluate_dsm(adev->handle, &wifi_acpi_dsm_guid, + WBRF_REVISION, WBRF_RECORD, &argv4); + + if (!obj) + return -EINVAL; + + if (obj->type != ACPI_TYPE_INTEGER) { + ret = -EINVAL; + goto out; + } + + ret = obj->integer.value; + if (ret) + ret = -EINVAL; + +out: + ACPI_FREE(obj); + kfree(tmp); + + return ret; +} + +/** + * acpi_amd_wbrf_add_remove - add or remove the frequency band the device is using + * + * @dev: device pointer + * @action: remove or add the frequency band into bios + * @in: input structure containing the frequency band the device is using + * + * Broadcast to other consumers the frequency band the device starts + * to use. Underneath the surface the information is cached into an + * internal buffer first. Then a notification is sent to all those + * registered consumers. So then they can retrieve that buffer to + * know the latest active frequency bands. Consumers that haven't + * yet been registered can retrieve the information from the cache + * when they register. + * + * Return: + * 0 for success add/remove wifi frequency band. + * Returns a negative error code for failure. + */ +int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in) +{ + struct acpi_device *adev; + int ret; + + adev = ACPI_COMPANION(dev); + if (!adev) + return -ENODEV; + + ret = wbrf_record(adev, action, in); + if (ret) + return ret; + + blocking_notifier_call_chain(&wbrf_chain_head, WBRF_CHANGED, NULL); + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_amd_wbrf_add_remove); + +/** + * acpi_amd_wbrf_supported_producer - determine if the WBRF can be enabled + * for the device as a producer + * + * @dev: device pointer + * + * Check if the platform equipped with necessary implementations to + * support WBRF for the device as a producer. + * + * Return: + * true if WBRF is supported, otherwise returns false + */ +bool acpi_amd_wbrf_supported_producer(struct device *dev) +{ + struct acpi_device *adev; + + adev = ACPI_COMPANION(dev); + if (!adev) + return false; + + return acpi_check_dsm(adev->handle, &wifi_acpi_dsm_guid, + WBRF_REVISION, BIT(WBRF_RECORD)); +} +EXPORT_SYMBOL_GPL(acpi_amd_wbrf_supported_producer); + +/** + * acpi_amd_wbrf_supported_consumer - determine if the WBRF can be enabled + * for the device as a consumer + * + * @dev: device pointer + * + * Determine if the platform equipped with necessary implementations to + * support WBRF for the device as a consumer. + * + * Return: + * true if WBRF is supported, otherwise returns false. + */ +bool acpi_amd_wbrf_supported_consumer(struct device *dev) +{ + struct acpi_device *adev; + + adev = ACPI_COMPANION(dev); + if (!adev) + return false; + + return acpi_check_dsm(adev->handle, &wifi_acpi_dsm_guid, + WBRF_REVISION, BIT(WBRF_RETRIEVE)); +} +EXPORT_SYMBOL_GPL(acpi_amd_wbrf_supported_consumer); + +/** + * amd_wbrf_retrieve_freq_band - retrieve current active frequency bands + * + * @dev: device pointer + * @out: output structure containing all the active frequency bands + * + * Retrieve the current active frequency bands which were broadcasted + * by other producers. The consumer who calls this API should take + * proper actions if any of the frequency band may cause RFI with its + * own frequency band used. + * + * Return: + * 0 for getting wifi freq band successfully. + * Returns a negative error code for failure. + */ +int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out) +{ + struct amd_wbrf_ranges_out acpi_out = {0}; + struct acpi_device *adev; + union acpi_object *obj; + union acpi_object param; + int ret = 0; + + adev = ACPI_COMPANION(dev); + if (!adev) + return -ENODEV; + + param.type = ACPI_TYPE_STRING; + param.string.length = 0; + param.string.pointer = NULL; + + obj = acpi_evaluate_dsm(adev->handle, &wifi_acpi_dsm_guid, + WBRF_REVISION, WBRF_RETRIEVE, ¶m); + if (!obj) + return -EINVAL; + + /* + * The return buffer is with variable length and the format below: + * number_of_entries(1 DWORD): Number of entries + * start_freq of 1st entry(1 QWORD): Start frequency of the 1st entry + * end_freq of 1st entry(1 QWORD): End frequency of the 1st entry + * ... + * ... + * start_freq of the last entry(1 QWORD) + * end_freq of the last entry(1 QWORD) + * + * Thus the buffer length is determined by the number of entries. + * - For zero entry scenario, the buffer length will be 4 bytes. + * - For one entry scenario, the buffer length will be 20 bytes. + */ + if (obj->buffer.length > sizeof(acpi_out) || obj->buffer.length < 4) { + dev_err(dev, "Wrong sized WBRT information"); + ret = -EINVAL; + goto out; + } + memcpy(&acpi_out, obj->buffer.pointer, obj->buffer.length); + + out->num_of_ranges = acpi_out.num_of_ranges; + memcpy(out->band_list, acpi_out.band_list, sizeof(acpi_out.band_list)); + +out: + ACPI_FREE(obj); + return ret; +} +EXPORT_SYMBOL_GPL(amd_wbrf_retrieve_freq_band); + +/** + * amd_wbrf_register_notifier - register for notifications of frequency + * band update + * + * @nb: driver notifier block + * + * The consumer should register itself via this API so that it can get + * notified on the frequency band updates from other producers. + * + * Return: + * 0 for registering a consumer driver successfully. + * Returns a negative error code for failure. + */ +int amd_wbrf_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&wbrf_chain_head, nb); +} +EXPORT_SYMBOL_GPL(amd_wbrf_register_notifier); + +/** + * amd_wbrf_unregister_notifier - unregister for notifications of + * frequency band update + * + * @nb: driver notifier block + * + * The consumer should call this API when it is longer interested with + * the frequency band updates from other producers. Usually, this should + * be performed during driver cleanup. + * + * Return: + * 0 for unregistering a consumer driver. + * Returns a negative error code for failure. + */ +int amd_wbrf_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&wbrf_chain_head, nb); +} +EXPORT_SYMBOL_GPL(amd_wbrf_unregister_notifier); +#endif /*HAVE_LINUX_ACPI_AMD_WBRF_H*/ diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_workqueue.c b/drivers/gpu/drm/amd/amdkcl/kcl_workqueue.c new file mode 100644 index 0000000000000..7c9b248df0e27 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_workqueue.c @@ -0,0 +1,46 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#ifndef HAVE_CANCEL_WORK +static bool (*_kcl_cancel_work)(struct work_struct *work, bool is_dwork); + +bool _kcl_cancel_work_stub(struct work_struct *work, bool is_dwork) +{ + pr_warn_once("cancel_work function is not supported\n"); + return false; +} + +bool kcl_cancel_work(struct work_struct *work) +{ + return _kcl_cancel_work(work, false); +} +EXPORT_SYMBOL(kcl_cancel_work); +#endif + +void amdkcl_workqueue_init(void) +{ +#ifndef HAVE_CANCEL_WORK + _kcl_cancel_work = amdkcl_fp_setup("__cancel_work", _kcl_cancel_work_stub); +#endif /* HAVE_CANCEL_WORK */ +} + diff --git a/drivers/gpu/drm/amd/amdkcl/main.c b/drivers/gpu/drm/amd/amdkcl/main.c new file mode 100644 index 0000000000000..e02c5db0eb328 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/main.c @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +#include +#include + +extern void amdkcl_dev_cgroup_init(void); +extern void amdkcl_fence_init(void); +extern void amdkcl_io_init(void); +extern void amdkcl_mm_init(void); +extern void amdkcl_suspend_init(void); +extern void amdkcl_numa_init(void); +extern void amdkcl_workqueue_init(void); +extern void amdkcl_prime_init(void); + +int __init amdkcl_init(void) +{ + amdkcl_dev_cgroup_init(); + amdkcl_fence_init(); + amdkcl_io_init(); + amdkcl_mm_init(); + amdkcl_suspend_init(); + amdkcl_numa_init(); + amdkcl_workqueue_init(); + amdkcl_prime_init(); + + return 0; +} +module_init(amdkcl_init); + +void __exit amdkcl_exit(void) +{ + +} + +module_exit(amdkcl_exit); + +MODULE_AUTHOR("AMD linux driver team"); +MODULE_DESCRIPTION("Module for OS kernel compatible layer"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0"); diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 0d3d8972240da..ede9e5afa301d 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -59,7 +59,12 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v11.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o \ - $(AMDKFD_PATH)/kfd_debug.o + $(AMDKFD_PATH)/kfd_peerdirect.o \ + $(AMDKFD_PATH)/kfd_ipc.o \ + $(AMDKFD_PATH)/kfd_trace.o \ + $(AMDKFD_PATH)/kfd_spm.o \ + $(AMDKFD_PATH)/kfd_debug.o \ + $(AMDKFD_PATH)/kfd_pc_sampling.o ifneq ($(CONFIG_DEBUG_FS),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 02f7ba8c93cd4..af92680597b22 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -274,27 +274,29 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf820258, + 0xbf820001, 0xbf820267, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840009, 0x866eff6d, - 0x00ff0000, 0xbf85001e, + 0xbf840008, 0xbf0d986d, + 0xbf850023, 0x866eff7b, + 0x00000400, 0xbf850065, + 0xbf8e0010, 0xb8fbf803, + 0xbf82fffa, 0x866eff7b, + 0x03800900, 0xbf850019, + 0x866eff7b, 0x000071ff, + 0xbf840008, 0x866fff7b, + 0x00007080, 0xbf840001, + 0xbeee1a87, 0xb8eff801, + 0x8e6e8c6e, 0x866e6f6e, + 0xbf85000e, 0xbf0d986d, + 0xbf850003, 0x866eff6d, + 0x00ff0000, 0xbf850009, + 0xb8eef801, 0x866eff6e, + 0x00000800, 0xbf850005, + 0xbf0d986d, 0xbf850004, 0x866eff7b, 0x00000400, - 0xbf850055, 0xbf8e0010, - 0xb8fbf803, 0xbf82fffa, - 0x866eff7b, 0x03c00900, - 0xbf850015, 0x866eff7b, - 0x000071ff, 0xbf840008, - 0x866fff7b, 0x00007080, - 0xbf840001, 0xbeee1a87, - 0xb8eff801, 0x8e6e8c6e, - 0x866e6f6e, 0xbf85000a, - 0x866eff6d, 0x00ff0000, - 0xbf850007, 0xb8eef801, - 0x866eff6e, 0x00000800, - 0xbf850003, 0x866eff7b, - 0x00000400, 0xbf85003a, + 0xbf850046, 0xbeed1a9d, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8977ff77, 0xfc000000, @@ -303,228 +305,190 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031bbd, 0x00000010, - 0xbf8cc07f, 0x8e6e976e, - 0x8977ff77, 0x00800000, - 0x87776e77, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, + 0xc0031e7d, 0x00000010, + 0xc0071bbd, 0x00000000, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, - 0xbf840001, 0xbe801d6e, - 0x866eff6d, 0x01ff0000, - 0xbf850005, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xbf820005, - 0x866eff6d, 0x01000000, - 0xbf850002, 0x806c846c, - 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f7a8b77, - 0x867aff7a, 0x001f8000, - 0xb97af807, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e8378, - 0xb96ee0c2, 0xbf800002, - 0xb9780002, 0xbe801f6c, + 0xbf8cc07f, 0x8e799779, + 0x8977ff77, 0x01800000, + 0x87777977, 0xbf0d986d, + 0xbf840009, 0xbf0d9877, + 0xbf850007, 0x896dff6d, + 0x01ff0000, 0xba7f0583, + 0x00000000, 0xbf0d9d6d, + 0xbeed189d, 0xbf840012, + 0xbef71898, 0xbeed189d, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0x866eff6d, + 0x01ff0000, 0xbf850005, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbf820005, 0x866eff6d, + 0x01000000, 0xbf850002, + 0x806c846c, 0x826d806d, 0x866dff6d, 0x0000ffff, - 0xbefa0080, 0xb97a0283, - 0xb8faf807, 0x867aff7a, - 0x001f8000, 0x8e7a8b7a, - 0x8977ff77, 0xfc000000, - 0x87777a77, 0xba7ff807, - 0x00000000, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x877a8478, - 0xb97af802, 0xbf8e0002, - 0xbf88fffe, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0xb8fb1605, 0x807b817b, - 0x8e7b867b, 0x807a7b7a, - 0x807a7e7a, 0x827b807f, - 0x867bff7b, 0x0000ffff, - 0xc04b1c3d, 0x00000050, - 0xbf8cc07f, 0xc04b1d3d, - 0x00000060, 0xbf8cc07f, - 0xc0431e7d, 0x00000074, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0xbef1007c, - 0xbef00080, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, + 0x8f7a8b77, 0x867aff7a, + 0x001f8000, 0xb97af807, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8faf807, + 0x867aff7a, 0x001f8000, + 0x8e7a8b7a, 0x8977ff77, + 0xfc000000, 0x87777a77, + 0xba7ff807, 0x00000000, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0xbef1007c, 0xbef00080, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, 0xbefe007c, 0xbefc0070, - 0xc0611b3a, 0x0000007c, + 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b7a, + 0xbefc0070, 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bba, 0x0000007c, + 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bfa, + 0xbefc0070, 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611e3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8fbf803, - 0xbefe007c, 0xbefc0070, - 0xc0611efa, 0x0000007c, + 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a3a, + 0xbefc0070, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8f1f801, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, + 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, - 0xbef600ff, 0x01000000, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf85004d, 0xbe840080, - 0xd2890000, 0x00000900, - 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, - 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85004d, 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000902, + 0xd2890000, 0x00000901, 0x80048104, 0xd2890001, - 0x00000902, 0x80048104, - 0xd2890002, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, 0x80048104, 0xd2890003, - 0x00000902, 0x80048104, + 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000903, 0x80048104, - 0xd2890001, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, 0x80048104, 0xd2890002, - 0x00000903, 0x80048104, - 0xd2890003, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbf820008, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf840063, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf84005f, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, - 0x867aff78, 0x00400000, - 0xbf850003, 0xb8faf803, - 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, - 0xd2890000, 0x00000900, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, 0x80048104, 0xd2890001, - 0x00000900, 0x80048104, - 0xd2890002, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, 0x80048104, 0xd2890003, - 0x00000900, 0x80048104, + 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000901, 0x80048104, - 0xd2890001, 0x00000901, - 0x80048104, 0xd2890002, - 0x00000901, 0x80048104, - 0xd2890003, 0x00000901, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb2a05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840063, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf84005f, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -544,137 +508,181 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbf8200c7, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x04000000, - 0xbf84001e, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf840019, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, - 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xbf8c0f70, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200c7, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf840019, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, + 0xbefe00c1, 0xbeff00c1, 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xbf8c0f70, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xc0211bfa, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, 0x00000078, 0x80788478, - 0xc0211b3a, 0x00000078, - 0x80788478, 0xc0211b7a, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211c3a, 0x00000078, 0x80788478, - 0xc0211c3a, 0x00000078, - 0x80788478, 0xc0211c7a, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211eba, 0x00000078, 0x80788478, - 0xc0211eba, 0x00000078, - 0x80788478, 0xc0211efa, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211a3a, 0x00000078, 0x80788478, - 0xc0211a3a, 0x00000078, - 0x80788478, 0xc0211a7a, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, 0x00000078, 0x80788478, - 0xc0211cfa, 0x00000078, - 0x80788478, 0xbf8cc07f, - 0xbefc006f, 0xbefe0070, - 0xbeff0071, 0x866f7bff, - 0x000003ff, 0xb96f4803, - 0x866f7bff, 0xfffff800, - 0x8f6f8b6f, 0xb96fa2c3, - 0xb973f801, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xbf8cc07f, 0xbefc006f, + 0xbefe0070, 0xbeff0071, + 0x866f7bff, 0x000003ff, + 0xb96f4803, 0x866f7bff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2a05, 0x806e816e, + 0x8e6e8a6e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { @@ -1302,27 +1310,29 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { }; static const uint32_t cwsr_trap_arcturus_hex[] = { - 0xbf820001, 0xbf8202d4, + 0xbf820001, 0xbf8202e3, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840009, 0x866eff6d, - 0x00ff0000, 0xbf85001e, + 0xbf840008, 0xbf0d986d, + 0xbf850023, 0x866eff7b, + 0x00000400, 0xbf850065, + 0xbf8e0010, 0xb8fbf803, + 0xbf82fffa, 0x866eff7b, + 0x03800900, 0xbf850019, + 0x866eff7b, 0x000071ff, + 0xbf840008, 0x866fff7b, + 0x00007080, 0xbf840001, + 0xbeee1a87, 0xb8eff801, + 0x8e6e8c6e, 0x866e6f6e, + 0xbf85000e, 0xbf0d986d, + 0xbf850003, 0x866eff6d, + 0x00ff0000, 0xbf850009, + 0xb8eef801, 0x866eff6e, + 0x00000800, 0xbf850005, + 0xbf0d986d, 0xbf850004, 0x866eff7b, 0x00000400, - 0xbf850055, 0xbf8e0010, - 0xb8fbf803, 0xbf82fffa, - 0x866eff7b, 0x03c00900, - 0xbf850015, 0x866eff7b, - 0x000071ff, 0xbf840008, - 0x866fff7b, 0x00007080, - 0xbf840001, 0xbeee1a87, - 0xb8eff801, 0x8e6e8c6e, - 0x866e6f6e, 0xbf85000a, - 0x866eff6d, 0x00ff0000, - 0xbf850007, 0xb8eef801, - 0x866eff6e, 0x00000800, - 0xbf850003, 0x866eff7b, - 0x00000400, 0xbf85003a, + 0xbf850046, 0xbeed1a9d, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8977ff77, 0xfc000000, @@ -1331,187 +1341,130 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031bbd, 0x00000010, - 0xbf8cc07f, 0x8e6e976e, - 0x8977ff77, 0x00800000, - 0x87776e77, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, + 0xc0031e7d, 0x00000010, + 0xc0071bbd, 0x00000000, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, - 0xbf840001, 0xbe801d6e, - 0x866eff6d, 0x01ff0000, - 0xbf850005, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xbf820005, - 0x866eff6d, 0x01000000, - 0xbf850002, 0x806c846c, - 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f7a8b77, - 0x867aff7a, 0x001f8000, - 0xb97af807, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e8378, - 0xb96ee0c2, 0xbf800002, - 0xb9780002, 0xbe801f6c, + 0xbf8cc07f, 0x8e799779, + 0x8977ff77, 0x01800000, + 0x87777977, 0xbf0d986d, + 0xbf840009, 0xbf0d9877, + 0xbf850007, 0x896dff6d, + 0x01ff0000, 0xba7f0583, + 0x00000000, 0xbf0d9d6d, + 0xbeed189d, 0xbf840012, + 0xbef71898, 0xbeed189d, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0x866eff6d, + 0x01ff0000, 0xbf850005, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbf820005, 0x866eff6d, + 0x01000000, 0xbf850002, + 0x806c846c, 0x826d806d, 0x866dff6d, 0x0000ffff, - 0xbefa0080, 0xb97a0283, - 0xb8faf807, 0x867aff7a, - 0x001f8000, 0x8e7a8b7a, - 0x8977ff77, 0xfc000000, - 0x87777a77, 0xba7ff807, - 0x00000000, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x877a8478, - 0xb97af802, 0xbf8e0002, - 0xbf88fffe, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0x8e7a817a, 0xb8fb1605, - 0x807b817b, 0x8e7b867b, - 0x807a7b7a, 0x807a7e7a, - 0x827b807f, 0x867bff7b, - 0x0000ffff, 0xc04b1c3d, - 0x00000050, 0xbf8cc07f, - 0xc04b1d3d, 0x00000060, - 0xbf8cc07f, 0xc0431e7d, - 0x00000074, 0xbf8cc07f, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0xbef1007c, 0xbef00080, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, + 0x8f7a8b77, 0x867aff7a, + 0x001f8000, 0xb97af807, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8faf807, + 0x867aff7a, 0x001f8000, + 0x8e7a8b7a, 0x8977ff77, + 0xfc000000, 0x87777a77, + 0xba7ff807, 0x00000000, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0x8e7a817a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0xbef1007c, + 0xbef00080, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, 0xbefe007c, 0xbefc0070, - 0xc0611b3a, 0x0000007c, + 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b7a, + 0xbefc0070, 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bba, 0x0000007c, + 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bfa, + 0xbefc0070, 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611e3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8fbf803, - 0xbefe007c, 0xbefc0070, - 0xc0611efa, 0x0000007c, + 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a3a, + 0xbefc0070, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8f1f801, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, + 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -1530,31 +1483,50 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2a05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -1574,235 +1546,281 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xbefc0080, - 0xbf11017c, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850059, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xbefc0080, 0xbf11017c, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850059, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xbe840080, + 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, + 0x00000900, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, + 0xd2890000, 0x00000902, 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, + 0x00000902, 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffa9, 0xbf9c0000, - 0xbf820016, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffeb, - 0xbf9c0000, 0xbf8200e3, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x866eff7f, 0x04000000, - 0xbf84001f, 0xbefe00c1, - 0xbeff00c1, 0xb8ef4306, - 0x866fc16f, 0xbf84001a, - 0x8e6f866f, 0x8e6f826f, - 0xbef6006f, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, - 0xbefe00c1, 0xbeff00c1, - 0xbef600ff, 0x01000000, - 0xb8ef2a05, 0x806f816f, - 0x8e6f826f, 0x806fff6f, - 0x00008000, 0xbef80080, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefc0084, - 0xbf11087c, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffee, 0xbefc0080, - 0xbf11087c, 0xe0524000, - 0x781d0000, 0xe0524100, - 0x781d0100, 0xe0524200, - 0x781d0200, 0xe0524300, - 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, - 0x807c847c, 0x8078ff78, - 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82a05, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffa9, + 0xbf9c0000, 0xbf820016, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffeb, 0xbf9c0000, + 0xbf8200e3, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x866eff7f, + 0x04000000, 0xbf84001f, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef4306, 0x866fc16f, + 0xbf84001a, 0x8e6f866f, + 0x8e6f826f, 0xbef6006f, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbefe00c1, + 0xbeff00c1, 0xbef600ff, + 0x01000000, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x806fff6f, 0x00008000, + 0xbef80080, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2a05, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { - 0xbf820001, 0xbf8202df, + 0xbf820001, 0xbf8202ee, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840009, 0x866eff6d, - 0x00ff0000, 0xbf85001e, + 0xbf840008, 0xbf0d986d, + 0xbf850023, 0x866eff7b, + 0x00000400, 0xbf850065, + 0xbf8e0010, 0xb8fbf803, + 0xbf82fffa, 0x866eff7b, + 0x03800900, 0xbf850019, + 0x866eff7b, 0x000071ff, + 0xbf840008, 0x866fff7b, + 0x00007080, 0xbf840001, + 0xbeee1a87, 0xb8eff801, + 0x8e6e8c6e, 0x866e6f6e, + 0xbf85000e, 0xbf0d986d, + 0xbf850003, 0x866eff6d, + 0x00ff0000, 0xbf850009, + 0xb8eef801, 0x866eff6e, + 0x00000800, 0xbf850005, + 0xbf0d986d, 0xbf850004, 0x866eff7b, 0x00000400, - 0xbf850055, 0xbf8e0010, - 0xb8fbf803, 0xbf82fffa, - 0x866eff7b, 0x03c00900, - 0xbf850015, 0x866eff7b, - 0x000071ff, 0xbf840008, - 0x866fff7b, 0x00007080, - 0xbf840001, 0xbeee1a87, - 0xb8eff801, 0x8e6e8c6e, - 0x866e6f6e, 0xbf85000a, - 0x866eff6d, 0x00ff0000, - 0xbf850007, 0xb8eef801, - 0x866eff6e, 0x00000800, - 0xbf850003, 0x866eff7b, - 0x00000400, 0xbf85003a, + 0xbf850046, 0xbeed1a9d, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8977ff77, 0xfc000000, @@ -1811,187 +1829,130 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031bbd, 0x00000010, - 0xbf8cc07f, 0x8e6e976e, - 0x8977ff77, 0x00800000, - 0x87776e77, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, + 0xc0031e7d, 0x00000010, + 0xc0071bbd, 0x00000000, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, - 0xbf840001, 0xbe801d6e, - 0x866eff6d, 0x01ff0000, - 0xbf850005, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xbf820005, - 0x866eff6d, 0x01000000, - 0xbf850002, 0x806c846c, - 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f7a8b77, - 0x867aff7a, 0x001f8000, - 0xb97af807, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e8378, - 0xb96ee0c2, 0xbf800002, - 0xb9780002, 0xbe801f6c, + 0xbf8cc07f, 0x8e799779, + 0x8977ff77, 0x01800000, + 0x87777977, 0xbf0d986d, + 0xbf840009, 0xbf0d9877, + 0xbf850007, 0x896dff6d, + 0x01ff0000, 0xba7f0583, + 0x00000000, 0xbf0d9d6d, + 0xbeed189d, 0xbf840012, + 0xbef71898, 0xbeed189d, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0x866eff6d, + 0x01ff0000, 0xbf850005, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbf820005, 0x866eff6d, + 0x01000000, 0xbf850002, + 0x806c846c, 0x826d806d, 0x866dff6d, 0x0000ffff, - 0xbefa0080, 0xb97a0283, - 0xb8faf807, 0x867aff7a, - 0x001f8000, 0x8e7a8b7a, - 0x8977ff77, 0xfc000000, - 0x87777a77, 0xba7ff807, - 0x00000000, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x877a8478, - 0xb97af802, 0xbf8e0002, - 0xbf88fffe, 0xb8fa2985, - 0x807a817a, 0x8e7a8a7a, - 0x8e7a817a, 0xb8fb1605, - 0x807b817b, 0x8e7b867b, - 0x807a7b7a, 0x807a7e7a, - 0x827b807f, 0x867bff7b, - 0x0000ffff, 0xc04b1c3d, - 0x00000050, 0xbf8cc07f, - 0xc04b1d3d, 0x00000060, - 0xbf8cc07f, 0xc0431e7d, - 0x00000074, 0xbf8cc07f, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0xbef1007c, 0xbef00080, - 0xb8f02985, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, + 0x8f7a8b77, 0x867aff7a, + 0x001f8000, 0xb97af807, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8faf807, + 0x867aff7a, 0x001f8000, + 0x8e7a8b7a, 0x8977ff77, + 0xfc000000, 0x87777a77, + 0xba7ff807, 0x00000000, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2985, 0x807a817a, + 0x8e7a8a7a, 0x8e7a817a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0xbef1007c, + 0xbef00080, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, 0xbefe007c, 0xbefc0070, - 0xc0611b3a, 0x0000007c, + 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b7a, + 0xbefc0070, 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bba, 0x0000007c, + 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bfa, + 0xbefc0070, 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611e3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8fbf803, - 0xbefe007c, 0xbefc0070, - 0xc0611efa, 0x0000007c, + 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a3a, + 0xbefc0070, 0xc0611e3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, - 0xbefe007c, 0xbefc0070, - 0xc0611a7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8f1f801, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -2010,31 +1971,50 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2b05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -2054,51 +2034,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xb8fb2985, - 0x807b817b, 0x8e7b837b, - 0xb8fa2b05, 0x807a817a, - 0x8e7a827a, 0x80fb7a7b, - 0x867b7b7b, 0xbf84007a, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, 0x807bff7b, 0x00001000, - 0xbefc0080, 0xbf11017c, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850059, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xbe840080, + 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -2137,139 +2097,203 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffa9, - 0xbf9c0000, 0xbf820016, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, - 0xbf85ffeb, 0xbf9c0000, - 0xbf8200ee, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x04000000, 0xbf84001f, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200ee, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf84001a, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2b05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb8ef2985, 0x806f816f, - 0x8e6f836f, 0xb8f92b05, - 0x80798179, 0x8e798279, - 0x80ef796f, 0x866f6f6f, - 0xbf84001a, 0x806fff6f, - 0x00008000, 0xbefc0080, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2985, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b77, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { @@ -3151,25 +3175,27 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { }; static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { - 0xbf820001, 0xbf8202db, + 0xbf820001, 0xbf8202ea, 0xb8f8f802, 0x8978ff78, 0x00020006, 0xb8fbf803, 0x866eff78, 0x00002000, - 0xbf840009, 0x866eff6d, - 0x00ff0000, 0xbf85001a, + 0xbf840008, 0xbf0d986d, + 0xbf85001f, 0x866eff7b, + 0x00000400, 0xbf850061, + 0xbf8e0010, 0xb8fbf803, + 0xbf82fffa, 0x866eff7b, + 0x03800900, 0xbf850015, + 0x866eff7b, 0x000071ff, + 0xbf840008, 0x866fff7b, + 0x00007080, 0xbf840001, + 0xbeee1a87, 0xb8eff801, + 0x8e6e8c6e, 0x866e6f6e, + 0xbf85000a, 0xbf0d986d, + 0xbf850003, 0x866eff6d, + 0x00ff0000, 0xbf850005, + 0xbf0d986d, 0xbf850004, 0x866eff7b, 0x00000400, - 0xbf850051, 0xbf8e0010, - 0xb8fbf803, 0xbf82fffa, - 0x866eff7b, 0x03c00900, - 0xbf850011, 0x866eff7b, - 0x000071ff, 0xbf840008, - 0x866fff7b, 0x00007080, - 0xbf840001, 0xbeee1a87, - 0xb8eff801, 0x8e6e8c6e, - 0x866e6f6e, 0xbf850006, - 0x866eff6d, 0x00ff0000, - 0xbf850003, 0x866eff7b, - 0x00000400, 0xbf85003a, + 0xbf850046, 0xbeed1a9d, 0xb8faf807, 0x867aff7a, 0x001f8000, 0x8e7a8b7a, 0x8979ff79, 0xfc000000, @@ -3178,187 +3204,130 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0xb8fbf813, 0x8efa887a, 0xbf0d8f7b, 0xbf840002, 0x877bff7b, 0xffff0000, - 0xc0031bbd, 0x00000010, - 0xbf8cc07f, 0x8e6e976e, - 0x8979ff79, 0x00800000, - 0x87796e79, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, + 0xc0031cfd, 0x00000010, + 0xc0071bbd, 0x00000000, 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, - 0xbf840001, 0xbe801d6e, - 0x866eff6d, 0x01ff0000, - 0xbf850005, 0x8778ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xbf820005, - 0x866eff6d, 0x01000000, - 0xbf850002, 0x806c846c, - 0x826d806d, 0x866dff6d, - 0x0000ffff, 0x8f7a8b79, - 0x867aff7a, 0x001f8000, - 0xb97af807, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e8378, - 0xb96ee0c2, 0xbf800002, - 0xb9780002, 0xbe801f6c, + 0xbf8cc07f, 0x8e739773, + 0x8979ff79, 0x01800000, + 0x87797379, 0xbf0d986d, + 0xbf840009, 0xbf0d9879, + 0xbf850007, 0x896dff6d, + 0x01ff0000, 0xba7f0583, + 0x00000000, 0xbf0d9d6d, + 0xbeed189d, 0xbf840012, + 0xbef91898, 0xbeed189d, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0x866eff6d, + 0x01ff0000, 0xbf850005, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbf820005, 0x866eff6d, + 0x01000000, 0xbf850002, + 0x806c846c, 0x826d806d, 0x866dff6d, 0x0000ffff, - 0xbefa0080, 0xb97a0283, - 0xb8faf807, 0x867aff7a, - 0x001f8000, 0x8e7a8b7a, - 0x8979ff79, 0xfc000000, - 0x87797a79, 0xba7ff807, - 0x00000000, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x877a8478, - 0xb97af802, 0xbf8e0002, - 0xbf88fffe, 0xb8fa2985, - 0x807a817a, 0x8e7a8a7a, - 0x8e7a817a, 0xb8fb1605, - 0x807b817b, 0x8e7b867b, - 0x807a7b7a, 0x807a7e7a, - 0x827b807f, 0x867bff7b, - 0x0000ffff, 0xc04b1c3d, - 0x00000050, 0xbf8cc07f, - 0xc04b1d3d, 0x00000060, - 0xbf8cc07f, 0xc0431e7d, - 0x00000074, 0xbf8cc07f, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0xbef1007c, 0xbef00080, - 0xb8f02985, 0x80708170, - 0x8e708a70, 0x8e708170, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, + 0x8f7a8b79, 0x867aff7a, + 0x001f8000, 0xb97af807, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8faf807, + 0x867aff7a, 0x001f8000, + 0x8e7a8b7a, 0x8979ff79, + 0xfc000000, 0x87797a79, + 0xba7ff807, 0x00000000, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2985, 0x807a817a, + 0x8e7a8a7a, 0x8e7a817a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0xbef1007c, + 0xbef00080, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, 0xbefe007c, 0xbefc0070, - 0xc0611b3a, 0x0000007c, + 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b7a, + 0xbefc0070, 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bba, 0x0000007c, + 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bfa, + 0xbefc0070, 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611e3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8fbf803, - 0xbefe007c, 0xbefc0070, - 0xc0611efa, 0x0000007c, + 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a3a, + 0xbefc0070, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8f1f801, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, + 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fb1605, - 0x807b817b, 0x8e7b847b, - 0x8e76827b, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747074, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a7b7c, - 0xbf85ffe7, 0xbef40172, - 0xbef00080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0x867aff78, - 0x00400000, 0xbf850003, - 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf85004d, - 0xbe840080, 0xd2890000, - 0x00000900, 0x80048104, - 0xd2890001, 0x00000900, - 0x80048104, 0xd2890002, - 0x00000900, 0x80048104, - 0xd2890003, 0x00000900, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000901, - 0x80048104, 0xd2890001, - 0x00000901, 0x80048104, - 0xd2890002, 0x00000901, - 0x80048104, 0xd2890003, - 0x00000901, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0xbf820008, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8fb4306, - 0x867bc17b, 0xbf840064, - 0xbf8a0000, 0x867aff6f, - 0x04000000, 0xbf840060, - 0x8e7b867b, 0x8e7b827b, - 0xbef6007b, 0xb8f02985, - 0x80708170, 0x8e708a70, - 0x8e708170, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850030, 0x24040682, - 0xd86e4000, 0x00000002, - 0xbf8cc07f, 0xbe840080, + 0xbf85004d, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -3377,31 +3346,50 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x80048104, 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, - 0xbf84ffee, 0x680404ff, - 0x00000200, 0xd0c9006a, - 0x0000f702, 0xbf87ffd2, - 0xbf820015, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x701d0002, 0x68040702, - 0xd0c9006a, 0x0000f702, - 0xbf87fff7, 0xbef70000, - 0xbef000ff, 0x00000400, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8fb2b05, 0x807b817b, - 0x8e7b827b, 0xbef600ff, - 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf84006d, - 0xbf11017c, 0x807bff7b, - 0x00001000, 0x867aff78, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, - 0x10000000, 0xbf850051, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, @@ -3421,51 +3409,31 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0xc069003a, 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, - 0xbe840080, 0xd2890000, - 0x00000902, 0x80048104, - 0xd2890001, 0x00000902, - 0x80048104, 0xd2890002, - 0x00000902, 0x80048104, - 0xd2890003, 0x00000902, - 0x80048104, 0xc069003a, - 0x00000070, 0xbf8cc07f, - 0x80709070, 0xbf06c004, - 0xbf84ffee, 0xbe840080, - 0xd2890000, 0x00000903, - 0x80048104, 0xd2890001, - 0x00000903, 0x80048104, - 0xd2890002, 0x00000903, - 0x80048104, 0xd2890003, - 0x00000903, 0x80048104, - 0xc069003a, 0x00000070, - 0xbf8cc07f, 0x80709070, - 0xbf06c004, 0xbf84ffee, - 0x807c847c, 0xbf0a7b7c, - 0xbf85ffb1, 0xbf9c0000, - 0xbf820012, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0xe0724000, - 0x701d0000, 0xe0724100, - 0x701d0100, 0xe0724200, - 0x701d0200, 0xe0724300, - 0x701d0300, 0x807c847c, - 0x8070ff70, 0x00000400, - 0xbf0a7b7c, 0xbf85ffef, - 0xbf9c0000, 0xb8fb2985, - 0x807b817b, 0x8e7b837b, - 0xb8fa2b05, 0x807a817a, - 0x8e7a827a, 0x80fb7a7b, - 0x867b7b7b, 0xbf84007a, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, 0x807bff7b, 0x00001000, - 0xbefc0080, 0xbf11017c, 0x867aff78, 0x00400000, 0xbf850003, 0xb8faf803, 0x897a7aff, 0x10000000, - 0xbf850059, 0xd3d84000, - 0x18000100, 0xd3d84001, - 0x18000101, 0xd3d84002, - 0x18000102, 0xd3d84003, - 0x18000103, 0xbe840080, + 0xbf850051, 0xbe840080, 0xd2890000, 0x00000900, 0x80048104, 0xd2890001, 0x00000900, 0x80048104, @@ -3504,139 +3472,203 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x00000070, 0xbf8cc07f, 0x80709070, 0xbf06c004, 0xbf84ffee, 0x807c847c, - 0xbf0a7b7c, 0xbf85ffa9, - 0xbf9c0000, 0xbf820016, - 0xd3d84000, 0x18000100, - 0xd3d84001, 0x18000101, - 0xd3d84002, 0x18000102, - 0xd3d84003, 0x18000103, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, 0x701d0100, 0xe0724200, 0x701d0200, 0xe0724300, 0x701d0300, 0x807c847c, 0x8070ff70, 0x00000400, 0xbf0a7b7c, - 0xbf85ffeb, 0xbf9c0000, - 0xbf8200ee, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x04000000, 0xbf84001f, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200ee, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf84001a, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82985, 0x80788178, - 0x8e788a78, 0x8e788178, - 0xb8ee1605, 0x806e816e, - 0x8e6e866e, 0x80786e78, - 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xe0510000, - 0x781d0000, 0xe0510100, - 0x781d0000, 0x807cff7c, - 0x00000200, 0x8078ff78, - 0x00000200, 0xbf0a6f7c, - 0xbf85fff6, 0xbefe00c1, - 0xbeff00c1, 0xbef600ff, - 0x01000000, 0xb8ef2b05, - 0x806f816f, 0x8e6f826f, - 0x806fff6f, 0x00008000, - 0xbef80080, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xb8ef2985, 0x806f816f, - 0x8e6f836f, 0xb8f92b05, - 0x80798179, 0x8e798279, - 0x80ef796f, 0x866f6f6f, - 0xbf84001a, 0x806fff6f, - 0x00008000, 0xbefc0080, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, 0xbf11087c, 0xe0524000, 0x781d0000, 0xe0524100, 0x781d0100, 0xe0524200, 0x781d0200, 0xe0524300, 0x781d0300, 0xbf8c0f70, - 0xd3d94000, 0x18000100, - 0xd3d94001, 0x18000101, - 0xd3d94002, 0x18000102, - 0xd3d94003, 0x18000103, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, 0x807c847c, 0x8078ff78, 0x00000400, 0xbf0a6f7c, - 0xbf85ffea, 0xbf9c0000, - 0xe0524000, 0x6e1d0000, - 0xe0524100, 0x6e1d0100, - 0xe0524200, 0x6e1d0200, - 0xe0524300, 0x6e1d0300, - 0xbf8c0f70, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x80f8c078, - 0xb8ef1605, 0x806f816f, - 0x8e6f846f, 0x8e76826f, - 0xbef600ff, 0x01000000, - 0xbefc006f, 0xc031003a, - 0x00000078, 0x80f8c078, - 0xbf8cc07f, 0x80fc907c, - 0xbf800000, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff0, 0xb8f82985, - 0x80788178, 0x8e788a78, - 0x8e788178, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe0070, 0xbeff0071, - 0x866f7bff, 0x000003ff, - 0xb96f4803, 0x866f7bff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2985, 0x806e816e, - 0x8e6e8a6e, 0x8e6e816e, - 0xb8ef1605, 0x806f816f, - 0x8e6f866f, 0x806e6f6e, - 0x806e746e, 0x826f8075, - 0x866fff6f, 0x0000ffff, - 0xc00b1c37, 0x00000050, - 0xc00b1d37, 0x00000060, - 0xc0031e77, 0x00000074, - 0xbf8cc07f, 0x8f6e8b79, - 0x866eff6e, 0x001f8000, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e837a, - 0xb96ee0c2, 0xbf800002, - 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf9b0000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b79, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx12_hex[] = { diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index bb26338204f4b..e5887e58c3374 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -83,6 +83,7 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 var SQ_WAVE_TRAPSTS_HOST_TRAP_MASK = 0x400000 +var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT = 22 var SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK = 0x800000 var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x1000000 var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x2000000 @@ -108,12 +109,21 @@ var TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI deb var TTMP_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 var TTMP_DEBUG_TRAP_ENABLED_SHIFT = 23 var TTMP_DEBUG_TRAP_ENABLED_MASK = 0x800000 +var TTMP_HOST_TRAP_ENABLED_SHIFT = 24 +var TTMP_HOST_TRAP_ENABLED_MASK = 0x1000000 +var TTMP_FEATURES_ENABLED_FLAGS_SHIFT = TTMP_DEBUG_TRAP_ENABLED_SHIFT +var TTMP_FEATURES_ENABLED_FLAGS_MASK = TTMP_DEBUG_TRAP_ENABLED_MASK | TTMP_HOST_TRAP_ENABLED_MASK /* Save */ var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000 var S_SAVE_PC_HI_HT_MASK = 0x01000000 +var S_SAVE_PC_HI_HT_SHIFT = 24 +var S_SAVE_PC_HI_NON_DRIVER_MASKABLE_TRAP = 29 // Only used by the 1st level trap handler to remember if + // we saw a trap type that the driver could not mask, so that + // we can still go to the 2nd-level handler if we driver-mask another + // simultaneous trap. var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 @@ -139,9 +149,15 @@ var s_save_m0 = ttmp5 var s_save_ttmps_lo = s_save_tmp //no conflict var s_save_ttmps_hi = s_save_trapsts //no conflict #if ASIC_FAMILY >= CHIP_GC_9_4_3 -var s_save_ib_sts = ttmp13 +var s_save_ib_sts = ttmp13 // bits 31:26 hold IB_STS, bit 23 to hold debug flag to 2nd-level, + // bit 24 to hold host-trap request + // so bits 22:0 are available for stashing next variable's backup. +var s_tma_flags = ttmp7 // free #else -var s_save_ib_sts = ttmp11 +var s_save_ib_sts = ttmp11 // bits 31:26 hold IB_STS, bit 23 to hold debug flag to 2nd-level, + // bit 24 to hold host-trap request, bit 6 is no-scratch, bits 5-0 are wave-in-wg + // so bits 22:7 are available for stashing next variable's backup +var s_tma_flags = ttmp13 // free #endif /* Restore */ @@ -210,8 +226,8 @@ L_SKIP_RESTORE: L_HALTED: // Host trap may occur while wave is halted. - s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK - s_cbranch_scc1 L_FETCH_2ND_TRAP + s_bitcmp1_b32 s_save_pc_hi, S_SAVE_PC_HI_HT_SHIFT + s_cbranch_scc1 L_FETCH_2ND_TRAP_DRIVER_MASKABLE L_CHECK_SAVE: s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save @@ -229,12 +245,11 @@ L_NOT_HALTED: // Any concurrent SAVECTX will be handled upon re-entry once halted. // Check non-maskable exceptions. memory_violation, illegal_instruction - // and debugger (host trap, wave start/end, trap after instruction) - // exceptions always cause the wave to enter the trap handler. + // and debugger (wave start/end, trap after instruction) exceptions always + // cause the wave to enter the trap handler. s_and_b32 ttmp2, s_save_trapsts, \ SQ_WAVE_TRAPSTS_MEM_VIOL_MASK | \ SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK | \ - SQ_WAVE_TRAPSTS_HOST_TRAP_MASK | \ SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK | \ SQ_WAVE_TRAPSTS_WAVE_END_MASK | \ SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK @@ -257,9 +272,15 @@ L_NOT_ADDR_WATCH: s_cbranch_scc1 L_FETCH_2ND_TRAP L_CHECK_TRAP_ID: - // Check trap_id != 0 + // Check trap_id != 0. If this is a host trap (ttmp1.HT == 1), trap_id is + // non 0, but we defer that part of the check until later as this exception + // is driver maskable. We need to make sure that all non-driver-maskable + // exceptions are accounted for before checking for driver-maskable ones. + s_bitcmp1_b32 s_save_pc_hi, S_SAVE_PC_HI_HT_SHIFT + s_cbranch_scc1 L_SKIP_CHECK_TRAP_ID s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK s_cbranch_scc1 L_FETCH_2ND_TRAP +L_SKIP_CHECK_TRAP_ID: if SINGLE_STEP_MISSED_WORKAROUND // Prioritize single step exception over context save. @@ -269,16 +290,22 @@ if SINGLE_STEP_MISSED_WORKAROUND s_cbranch_scc1 L_FETCH_2ND_TRAP end + // Check TTMP1 bits 24 (HT) == 1 + s_bitcmp1_b32 s_save_pc_hi, S_SAVE_PC_HI_HT_SHIFT + s_cbranch_scc1 L_FETCH_2ND_TRAP_DRIVER_MASKABLE + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK s_cbranch_scc1 L_SAVE L_FETCH_2ND_TRAP: + s_bitset1_b32 s_save_pc_hi, S_SAVE_PC_HI_NON_DRIVER_MASKABLE_TRAP +L_FETCH_2ND_TRAP_DRIVER_MASKABLE: // Preserve and clear scalar XNACK state before issuing scalar reads. save_and_clear_ib_sts(ttmp14) // Read second-level TBA/TMA from first-level TMA and jump if available. - // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) - // ttmp12 holds SQ_WAVE_STATUS + // ttmp[2:5] and s_tma_flags can be used (others hold SPI-initialized debug + // data) ttmp12 holds SQ_WAVE_STATUS s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO) s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 @@ -287,18 +314,42 @@ L_FETCH_2ND_TRAP: s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA s_or_b32 ttmp15, ttmp15, 0xFFFF0000 L_NO_SIGN_EXTEND_TMA: - - s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag - s_waitcnt lgkmcnt(0) - s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT - s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK - s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp2 - + s_load_dword s_tma_flags, [ttmp14, ttmp15], 0x10 glc:1 //Load the debug enables and host trap enabled flags s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA - s_waitcnt lgkmcnt(0) s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) + // Put debug enable bit and host trap bit into SAVE_IB_STS register, bits + // 23 and 24, respectively. + s_lshl_b32 s_tma_flags, s_tma_flags, TTMP_FEATURES_ENABLED_FLAGS_SHIFT + s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_FEATURES_ENABLED_FLAGS_MASK + s_or_b32 s_save_ib_sts, s_save_ib_sts, s_tma_flags + + // If not a host trap, then driver cannot mask this. Go to the 2nd-level + // trap handler now. + s_bitcmp1_b32 s_save_pc_hi, S_SAVE_PC_HI_HT_SHIFT + s_cbranch_scc0 L_GOTO_2ND_TRAP + + // If driver said host traps are OK, go to the 2nd-level handler now. + s_bitcmp1_b32 s_save_ib_sts, TTMP_HOST_TRAP_ENABLED_SHIFT + s_cbranch_scc1 L_GOTO_2ND_TRAP + + // The driver said host traps are masked, zero out host trap and trapID. + s_andn2_b32 s_save_pc_hi, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK) + s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT, 1), 0x0 + + // If there was another trap besides this masked host trap, go handle it in + // 2nd-level handler. + s_bitcmp1_b32 s_save_pc_hi, S_SAVE_PC_HI_NON_DRIVER_MASKABLE_TRAP + s_bitset0_b32 s_save_pc_hi, S_SAVE_PC_HI_NON_DRIVER_MASKABLE_TRAP // zero this out + s_cbranch_scc0 L_EXIT_TRAP // Otherwise, exit the trap handler + +L_GOTO_2ND_TRAP: + // Reset bits used temporarily by 1st level trap handler so they do not + // leak to the 2nd level trap handler. + s_bitset0_b32 s_save_ib_sts, TTMP_HOST_TRAP_ENABLED_SHIFT + s_bitset0_b32 s_save_pc_hi, S_SAVE_PC_HI_NON_DRIVER_MASKABLE_TRAP + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 00350eccd5714..0f8bde24dc05c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -36,11 +36,14 @@ #include #include #include -#include #include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_svm.h" +#include "kfd_ipc.h" +#include "kfd_trace.h" +#include "kfd_pc_sampling.h" + #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" #include "amdgpu_dma_buf.h" @@ -64,7 +67,7 @@ static const struct file_operations kfd_fops = { static int kfd_char_dev_major = -1; struct device *kfd_device; -static const struct class kfd_class = { +static struct class kfd_class = { .name = kfd_dev_name, }; @@ -202,7 +205,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, } if ((args->ring_base_address) && - (!access_ok((const void __user *) args->ring_base_address, + (!kcl_access_ok((const void __user *) args->ring_base_address, sizeof(uint64_t)))) { pr_err("Can't access ring base address\n"); return -EFAULT; @@ -213,27 +216,27 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return -EINVAL; } - if (!access_ok((const void __user *) args->read_pointer_address, + if (!kcl_access_ok((const void __user *) args->read_pointer_address, sizeof(uint32_t))) { pr_err("Can't access read pointer\n"); return -EFAULT; } - if (!access_ok((const void __user *) args->write_pointer_address, + if (!kcl_access_ok((const void __user *) args->write_pointer_address, sizeof(uint32_t))) { pr_err("Can't access write pointer\n"); return -EFAULT; } if (args->eop_buffer_address && - !access_ok((const void __user *) args->eop_buffer_address, + !kcl_access_ok((const void __user *) args->eop_buffer_address, sizeof(uint32_t))) { pr_debug("Can't access eop buffer"); return -EFAULT; } if (args->ctx_save_restore_address && - !access_ok((const void __user *) args->ctx_save_restore_address, + !kcl_access_ok((const void __user *) args->ctx_save_restore_address, sizeof(uint32_t))) { pr_debug("Can't access ctx save restore buffer"); return -EFAULT; @@ -366,7 +369,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, + err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id, NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -451,7 +454,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, } if ((args->ring_base_address) && - (!access_ok((const void __user *) args->ring_base_address, + (!kcl_access_ok((const void __user *) args->ring_base_address, sizeof(uint64_t)))) { pr_err("Can't access ring base address\n"); return -EFAULT; @@ -1049,6 +1052,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, long err; uint64_t offset = args->mmap_offset; uint32_t flags = args->flags; + struct vm_area_struct *vma; + uint64_t cpuva = 0; + unsigned int mem_type = 0; if (args->size == 0) return -EINVAL; @@ -1107,7 +1113,38 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } - if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { + /* Check if the userptr corresponds to another (or third-party) + * device local memory. If so treat is as a doorbell. User + * space will be oblivious of this and will use this doorbell + * BO as a regular userptr BO + */ + mmap_read_lock(current->mm); + vma = find_vma(current->mm, args->mmap_offset); + if (vma && args->mmap_offset >= vma->vm_start && + (vma->vm_flags & VM_IO)) { + unsigned long pfn; + + err = follow_pfn(vma, args->mmap_offset, &pfn); + mmap_read_unlock(current->mm); + if (err) { + pr_debug("Failed to get PFN: %ld\n", err); + goto err_unlock; + } + flags |= KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL; + flags &= ~KFD_IOC_ALLOC_MEM_FLAGS_USERPTR; + offset = (pfn << PAGE_SHIFT); + } else { + mmap_read_unlock(current->mm); + if (offset & (PAGE_SIZE - 1)) { + pr_debug("Unaligned userptr address:%llx\n", + offset); + err = -EINVAL; + goto err_unlock; + } + cpuva = offset; + } + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { if (args->size != kfd_doorbell_process_slice(dev->kfd)) { err = -EINVAL; goto err_unlock; @@ -1137,7 +1174,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (err) goto err_unlock; - idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + mem_type = flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | + KFD_IOC_ALLOC_MEM_FLAGS_GTT | + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | + KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + args->va_addr, args->size, cpuva, mem_type, -1); if (idr_handle < 0) { err = -EFAULT; goto err_free; @@ -1149,7 +1192,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) size >>= 1; - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size)); + atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); } mutex_unlock(&p->mutex); @@ -1181,7 +1224,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, { struct kfd_ioctl_free_memory_of_gpu_args *args = data; struct kfd_process_device *pdd; - void *mem; + struct kfd_bo *buf_obj; int ret; uint64_t size = 0; @@ -1203,15 +1246,15 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, goto err_pdd; } - mem = kfd_process_device_translate_handle( - pdd, GET_IDR_HANDLE(args->handle)); - if (!mem) { + buf_obj = kfd_process_device_find_bo(pdd, + GET_IDR_HANDLE(args->handle)); + if (!buf_obj) { ret = -EINVAL; - goto err_unlock; + goto err_pdd; } ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, - (struct kgd_mem *)mem, pdd->drm_priv, &size); + buf_obj->mem, pdd->drm_priv, &size); /* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down. @@ -1220,7 +1263,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, kfd_process_device_remove_obj_handle( pdd, GET_IDR_HANDLE(args->handle)); - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size); + atomic64_sub(size, &pdd->vram_usage); err_unlock: err_pdd: @@ -1239,6 +1282,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, int i; uint32_t *devices_arr = NULL; + trace_kfd_map_memory_to_gpu_start(p); if (!args->n_devices) { pr_debug("Device IDs array empty\n"); return -EINVAL; @@ -1310,6 +1354,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), ((struct kgd_mem *)mem)->domain); + goto map_memory_to_gpu_failed; } args->n_success = i+1; @@ -1332,6 +1377,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } kfree(devices_arr); + trace_kfd_map_memory_to_gpu_end(p, + args->n_devices * sizeof(*devices_arr), "Success"); return err; get_process_device_data_failed: @@ -1342,6 +1389,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, mutex_unlock(&p->mutex); copy_from_user_failed: kfree(devices_arr); + trace_kfd_map_memory_to_gpu_end(p, + args->n_devices * sizeof(*devices_arr), "Failed"); return err; } @@ -1434,7 +1483,6 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, mutex_unlock(&p->mutex); kfree(devices_arr); - return 0; bind_process_to_device_failed: @@ -1575,7 +1623,8 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, if (r) goto err_unlock; - idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + args->va_addr, size, 0, 0, -1); if (idr_handle < 0) { r = -EFAULT; goto err_free; @@ -1595,6 +1644,51 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, return r; } +static int kfd_ioctl_ipc_export_handle(struct file *filep, + struct kfd_process *p, + void *data) +{ + struct kfd_ioctl_ipc_export_handle_args *args = data; + struct kfd_process_device *pdd; + int r; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + mutex_unlock(&p->mutex); + if (!pdd) + return -EINVAL; + + r = kfd_ipc_export_as_handle(pdd->dev, p, args->handle, args->share_handle, + args->flags); + if (r) + pr_err("Failed to export IPC handle\n"); + + return r; +} + +static int kfd_ioctl_ipc_import_handle(struct file *filep, + struct kfd_process *p, + void *data) +{ + struct kfd_ioctl_ipc_import_handle_args *args = data; + struct kfd_process_device *pdd; + int r; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + mutex_unlock(&p->mutex); + if (!pdd) + return -EINVAL; + + r = kfd_ipc_import_handle(pdd->dev, p, args->gpu_id, args->share_handle, + args->va_addr, &args->handle, + &args->mmap_offset, &args->flags, false); + if (r) + pr_err("Failed to import IPC handle\n"); + + return r; +} + static int kfd_ioctl_export_dmabuf(struct file *filep, struct kfd_process *p, void *data) { @@ -1647,6 +1741,21 @@ static int kfd_ioctl_export_dmabuf(struct file *filep, return ret; } +/* Place holder for deprecated DBG API */ +static int kfd_ioctl_dbg_set_debug_trap_deprecated(struct file *filep, + struct kfd_process *p, void *data) +{ + dev_dbg(kfd_device, "AMDKFD_IOC_DBG_TRAP is deprecated.\n"); + return -EINVAL; +} + +/* Place holder for deprecated CMA API */ +static int kfd_ioctl_cross_memory_copy_deprecated(struct file *filep, + struct kfd_process *local_p, void *data) { + dev_dbg(kfd_device, "AMDKFD_IOC_CROSS_MEMORY_COPY is deprecated.\n"); + return -EINVAL; +} + /* Handle requests for watching SMI events */ static int kfd_ioctl_smi_events(struct file *filep, struct kfd_process *p, void *data) @@ -1729,6 +1838,45 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) } #endif +static int kfd_ioctl_rlc_spm(struct file *filep, + struct kfd_process *p, void *data) +{ + return kfd_rlc_spm(p, data); +} + +static int kfd_ioctl_pc_sample(struct file *filep, + struct kfd_process *p, void __user *data) +{ + struct kfd_ioctl_pc_sample_args *args = data; + struct kfd_process_device *pdd; + int ret = 0; + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("PC Sampling does not support sched_policy %i", sched_policy); + return -EINVAL; + } + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + + if (!pdd) { + pr_debug("could not find gpu id 0x%x.", args->gpu_id); + ret = -EINVAL; + } else if (args->op == KFD_IOCTL_PCS_OP_START) { + pdd = kfd_bind_process_to_device(pdd->dev, p); + if (IS_ERR(pdd)) { + pr_debug("failed to bind process %p with gpu id 0x%x", p, args->gpu_id); + ret = -ESRCH; + } + } + + if (!ret) + ret = kfd_pc_sample(pdd, args); + mutex_unlock(&p->mutex); + + return ret; +} + static int criu_checkpoint_process(struct kfd_process *p, uint8_t __user *user_priv_data, uint64_t *priv_offset) @@ -1821,11 +1969,11 @@ static uint32_t get_process_num_bos(struct kfd_process *p) /* Run over all PDDs of the process */ for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - void *mem; + struct kfd_bo *buf_obj; int id; - idr_for_each_entry(&pdd->alloc_idr, mem, id) { - struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; + idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) { + struct kgd_mem *kgd_mem = (struct kgd_mem *)buf_obj->mem; if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base) num_of_bos++; @@ -1835,7 +1983,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p) } static int criu_get_prime_handle(struct kgd_mem *mem, - int flags, u32 *shared_fd) + int flags, u32 *shared_fd, + struct file **file) { struct dma_buf *dmabuf; int ret; @@ -1846,13 +1995,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem, return ret; } - ret = dma_buf_fd(dmabuf, flags); + ret = get_unused_fd_flags(flags); if (ret < 0) { pr_err("dmabuf create fd failed, ret:%d\n", ret); goto out_free_dmabuf; } *shared_fd = ret; + *file = dmabuf->file; return 0; out_free_dmabuf: @@ -1860,6 +2010,25 @@ static int criu_get_prime_handle(struct kgd_mem *mem, return ret; } +static void commit_files(struct file **files, + struct kfd_criu_bo_bucket *bo_buckets, + unsigned int count, + int err) +{ + while (count--) { + struct file *file = files[count]; + + if (!file) + continue; + if (err) { + fput(file); + put_unused_fd(bo_buckets[count].dmabuf_fd); + } else { + fd_install(bo_buckets[count].dmabuf_fd, file); + } + } +} + static int criu_checkpoint_bos(struct kfd_process *p, uint32_t num_bos, uint8_t __user *user_bos, @@ -1868,8 +2037,9 @@ static int criu_checkpoint_bos(struct kfd_process *p, { struct kfd_criu_bo_bucket *bo_buckets; struct kfd_criu_bo_priv_data *bo_privs; + struct file **files = NULL; int ret = 0, pdd_index, bo_index = 0, id; - void *mem; + struct kfd_bo *buf_obj; bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL); if (!bo_buckets) @@ -1881,17 +2051,23 @@ static int criu_checkpoint_bos(struct kfd_process *p, goto exit; } + files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL); + if (!files) { + ret = -ENOMEM; + goto exit; + } + for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { struct kfd_process_device *pdd = p->pdds[pdd_index]; struct amdgpu_bo *dumper_bo; struct kgd_mem *kgd_mem; - idr_for_each_entry(&pdd->alloc_idr, mem, id) { + idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) { struct kfd_criu_bo_bucket *bo_bucket; struct kfd_criu_bo_priv_data *bo_priv; int i, dev_idx = 0; - kgd_mem = (struct kgd_mem *)mem; + kgd_mem = (struct kgd_mem *)buf_obj->mem; dumper_bo = kgd_mem->bo; /* Skip checkpointing BOs that are used for Trap handler @@ -1923,7 +2099,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, ret = criu_get_prime_handle(kgd_mem, bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, - &bo_bucket->dmabuf_fd); + &bo_bucket->dmabuf_fd, &files[bo_index]); if (ret) goto exit; } else { @@ -1945,8 +2121,18 @@ static int criu_checkpoint_bos(struct kfd_process *p, bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; } - pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n" + if (kgd_mem->ipc_obj) { + bo_priv->ipc_flags = kgd_mem->ipc_obj->flags; + bo_priv->is_imported = kgd_mem->is_imported; + + memcpy(bo_priv->ipc_share_handle, + kgd_mem->ipc_obj->share_handle, + sizeof(kgd_mem->ipc_obj->share_handle)); + } + + pr_debug("[%d]bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx" "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x", + bo_index, bo_bucket->size, bo_bucket->addr, bo_bucket->offset, @@ -1974,12 +2160,8 @@ static int criu_checkpoint_bos(struct kfd_process *p, *priv_offset += num_bos * sizeof(*bo_privs); exit: - while (ret && bo_index--) { - if (bo_buckets[bo_index].alloc_flags - & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) - close_fd(bo_buckets[bo_index].dmabuf_fd); - } - + commit_files(files, bo_buckets, bo_index, ret); + kvfree(files); kvfree(bo_buckets); kvfree(bo_privs); return ret; @@ -2260,6 +2442,93 @@ static int criu_restore_devices(struct kfd_process *p, return ret; } +static int criu_restore_memory_of_gpu_ipc(struct kfd_process_device *pdd, + struct kfd_criu_bo_bucket *bo_bucket, + struct kfd_criu_bo_priv_data *bo_priv, + struct kgd_mem **kgd_mem) +{ + uint64_t alloc_handle = MAKE_HANDLE(pdd->user_gpu_id, bo_priv->idr_handle); + struct kfd_node *dev = pdd->dev; + struct kfd_bo *kfd_bo; + int ret, idr_handle; + uint64_t offset; + + ret = kfd_ipc_import_handle(dev, pdd->process, pdd->user_gpu_id, bo_priv->ipc_share_handle, + bo_bucket->addr, &alloc_handle, &offset, NULL, true); + if (ret) { + unsigned int mem_type; + + if (ret != -EINVAL) { + pr_err("Failed to import IPC handle ret:%d\n", ret); + return ret; + } + + /* kfd_ipc_import_handle returns -EINVAL if the ipc share_handle does not exist. + * In that case create a new BO and create a new ipc share_handle by calling + * amdgpu_amdkfd_gpuvm_export_ipc_obj. + */ + ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev, bo_bucket->addr, + bo_bucket->size, pdd->drm_priv, + kgd_mem, &offset, + bo_bucket->alloc_flags, true); + if (ret) { + pr_err("Could not create the BO\n"); + return ret; + } + + pr_debug("New IPC BO created: size:0x%llx addr:0x%llx offset:0x%llx\n", + bo_bucket->size, bo_bucket->addr, offset); + + mem_type = bo_bucket->alloc_flags & + (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT); + + idr_handle = kfd_process_device_create_obj_handle(pdd, *kgd_mem, bo_bucket->addr, + bo_bucket->size, 0, mem_type, + bo_priv->idr_handle); + if (idr_handle < 0) { + pr_err("Could not allocate idr\n"); + + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, *kgd_mem, pdd->drm_priv, + NULL); + return -ENOMEM; + } + + ret = amdgpu_amdkfd_gpuvm_export_ipc_obj(dev->adev, pdd->drm_priv, *kgd_mem, + &(*kgd_mem)->ipc_obj, bo_priv->ipc_flags, + bo_priv->ipc_share_handle); + if (ret == -EINVAL) { + /* This is a race condition. The other process that owns this same IPC + * handle created the handle before this process. Delete BO and re-use + * import IPC handle created by the other process. + */ + ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, *kgd_mem, + pdd->drm_priv, NULL); + if (ret) + return ret; + + kfd_process_device_remove_obj_handle(pdd, idr_handle); + + ret = kfd_ipc_import_handle(dev, pdd->process, pdd->user_gpu_id, + bo_priv->ipc_share_handle, + bo_bucket->addr, &alloc_handle, + &offset, NULL, true); + if (ret) + return ret; + } + } + + kfd_bo = kfd_process_device_find_bo(pdd, bo_priv->idr_handle); + *kgd_mem = kfd_bo->mem; + (*kgd_mem)->is_imported = bo_priv->is_imported; + + bo_bucket->restored_offset = offset; + if ((bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && !bo_priv->is_imported) + /* Update the VRAM usage count */ + atomic64_add(bo_bucket->size, &pdd->vram_usage); + + return 0; +} + static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, struct kfd_criu_bo_bucket *bo_bucket, struct kfd_criu_bo_priv_data *bo_priv, @@ -2268,6 +2537,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, int idr_handle; int ret; const bool criu_resume = true; + unsigned int mem_type = 0; u64 offset; if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { @@ -2304,9 +2574,17 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, bo_bucket->size, bo_bucket->addr, offset); /* Restore previous IDR handle */ - pr_debug("Restoring old IDR handle for the BO"); - idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle, - bo_priv->idr_handle + 1, GFP_KERNEL); + mem_type = bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | + KFD_IOC_ALLOC_MEM_FLAGS_GTT | + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | + KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); + + idr_handle = kfd_process_device_create_obj_handle(pdd, *kgd_mem, + bo_bucket->addr, + bo_bucket->size, + 0, mem_type, + bo_priv->idr_handle); if (idr_handle < 0) { pr_err("Could not allocate idr\n"); @@ -2324,20 +2602,24 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { bo_bucket->restored_offset = offset; /* Update the VRAM usage count */ - WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size); + atomic64_add(bo_bucket->size, &pdd->vram_usage); } return 0; } static int criu_restore_bo(struct kfd_process *p, struct kfd_criu_bo_bucket *bo_bucket, - struct kfd_criu_bo_priv_data *bo_priv) + struct kfd_criu_bo_priv_data *bo_priv, + struct file **file) { + const uint32_t zero_handle[4] = { 0, 0, 0, 0 }; struct kfd_process_device *pdd; struct kgd_mem *kgd_mem; int ret; int j; + BUILD_BUG_ON(sizeof_field(struct kfd_ipc_obj, share_handle) != sizeof(zero_handle)); + pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n", bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags, bo_priv->idr_handle); @@ -2348,7 +2630,11 @@ static int criu_restore_bo(struct kfd_process *p, return -ENODEV; } - ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem); + if (memcmp(bo_priv->ipc_share_handle, zero_handle, sizeof(zero_handle))) + ret = criu_restore_memory_of_gpu_ipc(pdd, bo_bucket, bo_priv, &kgd_mem); + else + ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem); + if (ret) return ret; @@ -2383,7 +2669,7 @@ static int criu_restore_bo(struct kfd_process *p, if (bo_bucket->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { ret = criu_get_prime_handle(kgd_mem, DRM_RDWR, - &bo_bucket->dmabuf_fd); + &bo_bucket->dmabuf_fd, file); if (ret) return ret; } else { @@ -2400,6 +2686,7 @@ static int criu_restore_bos(struct kfd_process *p, { struct kfd_criu_bo_bucket *bo_buckets = NULL; struct kfd_criu_bo_priv_data *bo_privs = NULL; + struct file **files = NULL; int ret = 0; uint32_t i = 0; @@ -2413,6 +2700,12 @@ static int criu_restore_bos(struct kfd_process *p, if (!bo_buckets) return -ENOMEM; + files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL); + if (!files) { + ret = -ENOMEM; + goto exit; + } + ret = copy_from_user(bo_buckets, (void __user *)args->bos, args->num_bos * sizeof(*bo_buckets)); if (ret) { @@ -2438,7 +2731,7 @@ static int criu_restore_bos(struct kfd_process *p, /* Create and map new BOs */ for (; i < args->num_bos; i++) { - ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]); + ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]); if (ret) { pr_debug("Failed to restore BO[%d] ret%d\n", i, ret); goto exit; @@ -2453,11 +2746,8 @@ static int criu_restore_bos(struct kfd_process *p, ret = -EFAULT; exit: - while (ret && i--) { - if (bo_buckets[i].alloc_flags - & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) - close_fd(bo_buckets[i].dmabuf_fd); - } + commit_files(files, bo_buckets, i, ret); + kvfree(files); kvfree(bo_buckets); kvfree(bo_privs); return ret; @@ -2539,7 +2829,7 @@ static int criu_restore(struct file *filep, * Set the process to evicted state to avoid running any new queues before all the memory * mappings are ready. */ - ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE); + ret = kfd_process_evict_queues(p, false, KFD_QUEUE_EVICTION_CRIU_RESTORE); if (ret) goto exit_unlock; @@ -2658,7 +2948,7 @@ static int criu_process_info(struct file *filep, goto err_unlock; } - ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT); + ret = kfd_process_evict_queues(p, false, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT); if (ret) goto err_unlock; @@ -2749,26 +3039,9 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug, p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; p->runtime_info.r_debug = r_debug; - p->runtime_info.ttmp_setup = enable_ttmp_setup; - if (p->runtime_info.ttmp_setup) { - for (i = 0; i < p->n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; - - if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) { - amdgpu_gfx_off_ctrl(pdd->dev->adev, false); - pdd->dev->kfd2kgd->enable_debug_trap( - pdd->dev->adev, - true, - pdd->dev->vm_info.last_vmid_kfd); - } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { - pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap( - pdd->dev->adev, - false, - 0); - } - } - } + if (enable_ttmp_setup) + kfd_dbg_enable_ttmp_setup(p); retry: if (p->debug_trap_enabled) { @@ -2918,10 +3191,10 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v goto out; } - /* Check if target is still PTRACED. */ rcu_read_lock(); + /* Check if target is still PTRACED. */ if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE - && ptrace_parent(target->lead_thread) != current) { + && ptrace_parent(target->lead_thread) != current) { pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid); r = -EPERM; } @@ -2931,6 +3204,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v goto out; mutex_lock(&target->mutex); + if (!!target->pc_sampling_ref) { + pr_debug("Cannot enable debug trap on PID:%d because PC Sampling active\n", args->pid); + r = -EBUSY; + goto unlock_out; + } if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) { pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op); @@ -3199,9 +3477,26 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, kfd_ioctl_set_debug_trap, 0), -}; -#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IPC_IMPORT_HANDLE, + kfd_ioctl_ipc_import_handle, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IPC_EXPORT_HANDLE, + kfd_ioctl_ipc_export_handle, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP_DEPRECATED, + kfd_ioctl_dbg_set_debug_trap_deprecated, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_RLC_SPM, + kfd_ioctl_rlc_spm, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CROSS_MEMORY_COPY_DEPRECATED, + kfd_ioctl_cross_memory_copy_deprecated, 0), + + /* TODO: KFD_IOC_FLAG_PERFMON is not required for host-trap, disable first */ + AMDKFD_IOCTL_DEF(AMDKFD_IOC_PC_SAMPLE, + kfd_ioctl_pc_sample, 0), +}; static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { @@ -3215,10 +3510,8 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) int retcode = -EINVAL; bool ptrace_attached = false; - if (nr >= AMDKFD_CORE_IOCTL_COUNT) - goto err_i1; - - if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { + if (((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) || + ((nr >= AMDKFD_COMMAND_START_2) && (nr < AMDKFD_COMMAND_END_2))) { u32 amdkfd_size; ioctl = &amdkfd_ioctls[nr]; @@ -3275,6 +3568,14 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) } } + /* PC Sampling Monitor */ + if (unlikely(ioctl->flags & KFD_IOC_FLAG_PERFMON)) { + if (!capable(CAP_PERFMON) && !capable(CAP_SYS_ADMIN)) { + retcode = -EACCES; + goto err_i1; + } + } + if (cmd & (IOC_IN | IOC_OUT)) { if (asize <= sizeof(stack_kdata)) { kdata = stack_kdata; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cd7b81b7b939a..7b2c408db75cf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); - pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; i++; } /* GL1 Data Cache per SA */ @@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; i++; } /* L3 Data Cache per GPU */ @@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } return i; @@ -1840,8 +1846,6 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) { struct crat_header *crat_table = (struct crat_header *)pcrat_image; - struct acpi_table_header *acpi_table; - acpi_status status; struct crat_subtype_generic *sub_type_hdr; int avail_size = *size; int numa_node_id; @@ -1849,6 +1853,10 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) uint32_t entries = 0; #endif int ret = 0; +#ifdef CONFIG_ACPI + struct acpi_table_header *acpi_table; + acpi_status status; +#endif if (!pcrat_image) return -EINVAL; @@ -1865,6 +1873,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) sizeof(crat_table->signature)); crat_table->length = sizeof(struct crat_header); +#ifdef CONFIG_ACPI status = acpi_get_table("DSDT", 0, &acpi_table); if (status != AE_OK) pr_warn("DSDT table not found for OEM information\n"); @@ -1876,6 +1885,11 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) CRAT_OEMTABLEID_LENGTH); acpi_put_table(acpi_table); } +#else + crat_table->oem_revision = 0; + memcpy(crat_table->oem_id, "INV", CRAT_OEMID_LENGTH); + memcpy(crat_table->oem_table_id, "UNAVAIL", CRAT_OEMTABLEID_LENGTH); +#endif crat_table->total_entries = 0; crat_table->num_domains = 0; @@ -2027,6 +2041,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) if (pxm > max_pxm) max_pxm = pxm; break; +#ifdef HAVE_ACPI_SRAT_GENERIC_AFFINITY case ACPI_SRAT_TYPE_GENERIC_AFFINITY: gpu = (struct acpi_srat_generic_affinity *)sub_header; bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | @@ -2036,6 +2051,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) numa_node = pxm_to_node(gpu->proximity_domain); } break; +#endif default: break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 312dfa84f29f8..dfb78d135e497 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -1133,3 +1133,29 @@ void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target, mutex_unlock(&target->event_mutex); } + +void kfd_dbg_enable_ttmp_setup(struct kfd_process *p) +{ + int i; + + if (p->runtime_info.ttmp_setup) + return; + + p->runtime_info.ttmp_setup = true; + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) { + amdgpu_gfx_off_ctrl(pdd->dev->adev, false); + pdd->dev->kfd2kgd->enable_debug_trap( + pdd->dev->adev, + true, + pdd->dev->vm_info.last_vmid_kfd); + } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { + pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap( + pdd->dev->adev, + false, + 0); + } + } +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 924d0fd85dfb8..395fb3e1feb57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -91,6 +91,9 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target, void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target, uint64_t exception_set_mask); + +void kfd_dbg_enable_ttmp_setup(struct kfd_process *p); + /* * If GFX off is enabled, chips that do not support RLC restore for the debug * registers will disable GFX off temporarily for the entire debug session. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c2d2598f776cd..b6c5ffd4630be 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" @@ -572,6 +573,19 @@ static int kfd_gws_init(struct kfd_node *node) ret = amdgpu_amdkfd_alloc_gws(node->adev, node->adev->gds.gws_size, &node->gws); + if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) + && kfd->mec2_fw_version < 0x81b6) || + (KFD_GC_VERSION(kfd) >= IP_VERSION(9, 1, 0) + && KFD_GC_VERSION(kfd) <= IP_VERSION(9, 2, 2) + && kfd->mec2_fw_version < 0x1b6) || + (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 0) + && kfd->mec2_fw_version < 0x1b6) || + (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) + && kfd->mec2_fw_version < 0x30) || + (KFD_GC_VERSION(kfd) >= IP_VERSION(11, 0, 0) && + KFD_GC_VERSION(kfd) < IP_VERSION(12, 0, 0))) + node->gws_debug_workaround = true; + return ret; } @@ -581,6 +595,18 @@ static void kfd_smi_init(struct kfd_node *dev) spin_lock_init(&dev->smi_lock); } +static void kfd_pc_sampling_init(struct kfd_node *dev) +{ + mutex_init(&dev->pcs_data.mutex); + idr_init_base(&dev->pcs_data.hosttrap_entry.base.pc_sampling_idr, 1); +} + +static void kfd_pc_sampling_exit(struct kfd_node *dev) +{ + idr_destroy(&dev->pcs_data.hosttrap_entry.base.pc_sampling_idr); + mutex_destroy(&dev->pcs_data.mutex); +} + static int kfd_init_node(struct kfd_node *node) { int err = -1; @@ -611,6 +637,7 @@ static int kfd_init_node(struct kfd_node *node) } kfd_smi_init(node); + kfd_pc_sampling_init(node); return 0; @@ -641,6 +668,7 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes) kfd_topology_remove_device(knode); if (knode->gws) amdgpu_amdkfd_free_gws(knode->adev, knode->gws); + kfd_pc_sampling_exit(knode); kfree(knode); kfd->nodes[i] = NULL; } @@ -778,7 +806,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (amdgpu_amdkfd_alloc_gtt_mem( kfd->adev, size, &kfd->gtt_mem, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, - false)) { + false, true)) { dev_err(kfd_device, "Could not allocate %d bytes\n", size); goto alloc_gtt_mem_failure; } @@ -946,7 +974,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd, kfd_smi_event_update_gpu_reset(node, false, reset_context); } - kgd2kfd_suspend(kfd, false); + kgd2kfd_suspend(kfd, false, true); for (i = 0; i < kfd->num_nodes; i++) kfd_signal_reset_event(kfd->nodes[i]); @@ -994,7 +1022,7 @@ bool kfd_is_locked(void) return (kfd_locked > 0); } -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm, bool force) { struct kfd_node *node; int i; @@ -1007,7 +1035,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) mutex_lock(&kfd_processes_mutex); /* For first KFD device suspend all the KFD processes */ if (++kfd_locked == 1) - kfd_suspend_all_processes(); + kfd_suspend_all_processes(force); mutex_unlock(&kfd_processes_mutex); } @@ -1059,13 +1087,24 @@ static inline void kfd_queue_work(struct workqueue_struct *wq, struct work_struct *work) { int cpu, new_cpu; + const struct cpumask *mask = NULL; cpu = new_cpu = smp_processor_id(); - do { - new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; - if (cpu_to_node(new_cpu) == numa_node_id()) + +#if defined(CONFIG_SCHED_SMT) + /* CPU threads in the same core */ + mask = cpu_smt_mask(cpu); +#endif + if (!mask || cpumask_weight(mask) <= 1) + /* CPU threads in the same NUMA node */ + mask = cpu_cpu_mask(cpu); + /* Pick the next online CPU thread in the same core or NUMA node */ + for_each_cpu_wrap(cpu, mask, cpu+1) { + if (cpu != new_cpu && cpu_online(cpu)) { + new_cpu = cpu; break; - } while (cpu != new_cpu); + } + } queue_work_on(new_cpu, wq, work); } @@ -1118,7 +1157,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger) return -ESRCH; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - r = kfd_process_evict_queues(p, trigger); + r = kfd_process_evict_queues(p, true, trigger); kfd_unref_process(p); return r; @@ -1392,6 +1431,13 @@ void kfd_dec_compute_active(struct kfd_node *node) WARN_ONCE(count < 0, "Compute profile ref. count error"); } +static bool kfd_compute_active(struct kfd_node *node) +{ + if (atomic_read(&node->kfd->compute_profile)) + return true; + return false; +} + void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { /* @@ -1446,6 +1492,63 @@ void kgd2kfd_unlock_kfd(void) mutex_unlock(&kfd_processes_mutex); } +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + int ret; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + node = kfd->nodes[node_id]; + + ret = node->dqm->ops.unhalt(node->dqm); + if (ret) + dev_err(kfd_device, "Error in starting scheduler\n"); + + return ret; +} + +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + + node = kfd->nodes[node_id]; + return node->dqm->ops.halt(node->dqm); +} + +bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return false; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return false; + } + + node = kfd->nodes[node_id]; + + return kfd_compute_active(node); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f6e2110702997..f7a0f53026548 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -151,6 +152,31 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases, xcc_id); } +bool check_if_queues_active(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + bool busy = false; + struct queue *q; + + dqm_lock(dqm); + list_for_each_entry(q, &qpd->queues_list, list) { + struct mqd_manager *mqd_mgr; + enum KFD_MQD_TYPE type; + + type = get_mqd_type_from_queue_type(q->properties.type); + mqd_mgr = dqm->mqd_mgrs[type]; + if (!mqd_mgr || !mqd_mgr->check_queue_active) + continue; + + busy = mqd_mgr->check_queue_active(q); + if (busy) + break; + } + dqm_unlock(dqm); + + return busy; +} + static void kfd_hws_hang(struct device_queue_manager *dqm) { struct device_process_node *cur; @@ -319,6 +345,46 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm) return retval; } +static int suspend_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_suspend(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to suspend gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + +static int resume_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_resume(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to resume gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + static void increment_queue_count(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -1667,6 +1733,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->active_cp_queue_count = 0; dqm->gws_queue_count = 0; dqm->active_runlist = false; + INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); dqm->trap_debug_vmid = 0; @@ -1679,6 +1746,60 @@ static int initialize_cpsch(struct device_queue_manager *dqm) return 0; } +/* halt_cpsch: + * Unmap queues so the schedule doesn't continue remaining jobs in the queue. + * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch + * is called. + */ +static int halt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + + WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); + + if (!dqm->is_hws_hang) { + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD, false); + else + ret = remove_all_queues_mes(dqm); + } + dqm->sched_halt = true; + dqm_unlock(dqm); + + return ret; +} + +/* unhalt_cpsch + * Unset dqm->sched_halt and map queues back to runlist + */ +static int unhalt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running || !dqm->sched_halt) { + WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); + dqm_unlock(dqm); + return 0; + } + dqm->sched_halt = false; + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD); + dqm_unlock(dqm); + + return ret; +} + static int start_cpsch(struct device_queue_manager *dqm) { struct device *dev = dqm->dev->adev->dev; @@ -1954,7 +2075,7 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, { unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; struct device *dev = dqm->dev->adev->dev; - uint64_t *fence_addr = dqm->fence_addr; + volatile uint64_t *fence_addr = dqm->fence_addr; while (*fence_addr != fence_value) { /* Fatal err detected, this response won't come */ @@ -1984,7 +2105,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) struct device *dev = dqm->dev->adev->dev; int retval; - if (!dqm->sched_running) + if (!dqm->sched_running || dqm->sched_halt) return 0; if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) return 0; @@ -2154,12 +2275,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (retval) goto out; } - retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); if (retval) goto out; *dqm->fence_addr = KFD_FENCE_INIT; + mb(); pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, KFD_FENCE_COMPLETED); /* should be timed out */ @@ -2313,10 +2434,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, pdd->sdma_past_activity_counter += sdma_val; } - list_del(&q->list); - qpd->queue_count--; if (q->properties.is_active) { decrement_queue_count(dqm, qpd, q); + q->properties.is_active = false; if (!dqm->dev->kfd->shared_resources.enable_mes) { retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, @@ -2327,6 +2447,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, retval = remove_queue_mes(dqm, q, qpd); } } + list_del(&q->list); + qpd->queue_count--; /* * Unconditionally decrement this counter, regardless of the queue's @@ -2687,7 +2809,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), - (void *)&(mem_obj->cpu_ptr), false); + (void *)&(mem_obj->cpu_ptr), false, true); return retval; } @@ -2727,6 +2849,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.halt = halt_cpsch; + dqm->ops.unhalt = unhalt_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; dqm->ops.register_process = register_process; @@ -2835,6 +2959,95 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) +{ + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct device_queue_manager *dqm = knode->dqm; + struct device *dev = dqm->dev->adev->dev; + struct qcm_process_device *qpd; + struct queue *q = NULL; + int ret = 0; + + if (!p) + return -EINVAL; + + dqm_lock(dqm); + + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) { + qpd = &pdd->qpd; + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->doorbell_id == doorbell_id && q->properties.is_active) { + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + q->properties.is_evicted = true; + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + + ret = remove_queue_mes(dqm, q, qpd); + if (ret) { + dev_err(dev, "Removing bad queue failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + + break; + } + } + } + +out: + dqm_unlock(dqm); + return ret; +} + +static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct device *dev = dqm->dev->adev->dev; + int ret = 0; + + /* Check if process is already evicted */ + dqm_lock(dqm); + if (qpd->evicted) { + /* Increment the evicted count to make sure the + * process stays evicted before its terminated. + */ + qpd->evicted++; + dqm_unlock(dqm); + goto out; + } + dqm_unlock(dqm); + + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + ret = dqm->ops.evict_process_queues(dqm, qpd); + if (ret) { + dev_err(dev, "Evicting process queues failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + +out: + return ret; +} + int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; @@ -2845,8 +3058,13 @@ int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) return -EINVAL; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); pdd = kfd_get_process_device_data(dqm->dev, p); - if (pdd) - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + if (pdd) { + if (dqm->dev->kfd->shared_resources.enable_mes) + ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); + else + ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + } + kfd_unref_process(p); return ret; @@ -2982,7 +3200,7 @@ struct copy_context_work_handler_workarea { struct kfd_process *p; }; -static void copy_context_work_handler (struct work_struct *work) +static void copy_context_work_handler(struct work_struct *work) { struct copy_context_work_handler_workarea *workarea; struct mqd_manager *mqd_mgr; @@ -3009,6 +3227,9 @@ static void copy_context_work_handler (struct work_struct *work) struct qcm_process_device *qpd = &pdd->qpd; list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE) + continue; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; /* We ignore the return value from get_wave_state @@ -3349,6 +3570,41 @@ int debug_refresh_runlist(struct device_queue_manager *dqm) return debug_map_and_unlock(dqm); } +void remap_queue(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, + uint32_t grace_period) +{ + dqm_lock(dqm); + if (!dqm->dev->kfd->shared_resources.enable_mes) + execute_queues_cpsch(dqm, filter, filter_param, grace_period); + dqm_unlock(dqm); +} + +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off, u32 *queue_format) +{ + struct queue *q; + bool r = false; + + if (!queue_format) + return r; + + dqm_lock(dqm); + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.doorbell_off == doorbell_off) { + *queue_format = q->properties.format; + r = true; + goto out; + } + } + +out: + dqm_unlock(dqm); + return r; +} #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index dfb36a2466370..2aab6bc434f52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -106,6 +106,12 @@ union GRBM_GFX_INDEX_BITS { * @uninitialize: Destroys all the device queue manager resources allocated in * initialize routine. * + * @halt: This routine unmaps queues from runlist and set halt status to true + * so no more queues will be mapped to runlist until unhalt. + * + * @unhalt: This routine unset halt status to flase and maps queues back to + * runlist. + * * @create_kernel_queue: Creates kernel queue. Used for debug queue. * * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue. @@ -153,6 +159,8 @@ struct device_queue_manager_ops { int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); void (*uninitialize)(struct device_queue_manager *dqm); + int (*halt)(struct device_queue_manager *dqm); + int (*unhalt)(struct device_queue_manager *dqm); int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); @@ -252,7 +260,7 @@ struct device_queue_manager { uint16_t vmid_pasid[VMID_NUM]; uint64_t pipelines_addr; uint64_t fence_gpu_addr; - uint64_t *fence_addr; + volatile uint64_t *fence_addr; struct kfd_mem_obj *fence_mem; bool active_runlist; int sched_policy; @@ -264,6 +272,7 @@ struct device_queue_manager { struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; + bool sched_halt; /* used for GFX 9.4.3 only */ uint32_t current_logical_xcc_start; @@ -297,6 +306,8 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm); +bool check_if_queues_active(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); int reserve_debug_trap_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd); int release_debug_trap_vmid(struct device_queue_manager *dqm, @@ -315,6 +326,14 @@ void set_queue_snapshot_entry(struct queue *q, int debug_lock_and_unmap(struct device_queue_manager *dqm); int debug_map_and_unlock(struct device_queue_manager *dqm); int debug_refresh_runlist(struct device_queue_manager *dqm); +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off, u32 *queue_format); + +void remap_queue(struct device_queue_manager *dqm, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, + uint32_t grace_period); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 05c74887fd6fd..99e0d445ff2d9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -103,11 +103,127 @@ void kfd_doorbell_fini(struct kfd_dev *kfd) (void **)&kfd->doorbell_kernel_ptr); } +static void kfd_doorbell_open(struct vm_area_struct *vma) +{ + /* Don't track the parent's PDD in a child process. We do set + * VM_DONTCOPY, but that can be overridden from user mode. + */ + vma->vm_private_data = NULL; +} + +static void kfd_doorbell_close(struct vm_area_struct *vma) +{ + struct kfd_process_device *pdd = vma->vm_private_data; + + if (!pdd) + return; + + mutex_lock(&pdd->qpd.doorbell_lock); + pdd->qpd.doorbell_vma = NULL; + /* Remember if the process was evicted without doorbells + * mapped to user mode. + */ + if (pdd->qpd.doorbell_mapped == 0) + pdd->qpd.doorbell_mapped = -1; + mutex_unlock(&pdd->qpd.doorbell_lock); +} + +#ifdef HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG +static vm_fault_t kfd_doorbell_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#else +static int kfd_doorbell_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#endif + struct kfd_process_device *pdd = vma->vm_private_data; + + if (!pdd) + return VM_FAULT_SIGBUS; + + pr_debug("Process %d doorbell vm page fault\n", pdd->process->pasid); + + kfd_process_remap_doorbells_locked(pdd->process); + + kfd_process_schedule_restore(pdd->process); + + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct kfd_doorbell_vm_ops = { + .open = kfd_doorbell_open, + .close = kfd_doorbell_close, + .fault = kfd_doorbell_vm_fault, +}; + +static void kfd_doorbell_unmap_locked(struct kfd_process_device *pdd) +{ + struct kfd_process *process = pdd->process; + struct vm_area_struct *vma; + size_t size; + + vma = pdd->qpd.doorbell_vma; + /* Remember if the process was evicted without doorbells + * mapped to user mode. + */ + if (!vma) { + pdd->qpd.doorbell_mapped = -1; + return; + } + + pr_debug("Process %d unmapping doorbell 0x%lx\n", + process->pasid, vma->vm_start); + + size = kfd_doorbell_process_slice(pdd->dev->kfd); + zap_vma_ptes(vma, vma->vm_start, size); + pdd->qpd.doorbell_mapped = 0; +} + +void kfd_doorbell_unmap(struct kfd_process_device *pdd) +{ + mutex_lock(&pdd->qpd.doorbell_lock); + kfd_doorbell_unmap_locked(pdd); + mutex_unlock(&pdd->qpd.doorbell_lock); +} + +int kfd_doorbell_remap(struct kfd_process_device *pdd) +{ + struct kfd_process *process = pdd->process; + phys_addr_t address; + struct vm_area_struct *vma; + size_t size; + int ret = 0; + + mutex_lock(&pdd->qpd.doorbell_lock); + if (pdd->qpd.doorbell_mapped != 0) + goto out_unlock; + + /* Calculate physical address of doorbell */ + address = kfd_get_process_doorbells(pdd); + vma = pdd->qpd.doorbell_vma; + size = kfd_doorbell_process_slice(pdd->dev->kfd); + + pr_debug("Process %d remap doorbell 0x%lx\n", process->pasid, + vma->vm_start); + + ret = vm_iomap_memory(vma, address, size); + if (ret) + pr_err("Process %d failed to remap doorbell 0x%lx\n", + process->pasid, vma->vm_start); + +out_unlock: + pdd->qpd.doorbell_mapped = 1; + mutex_unlock(&pdd->qpd.doorbell_lock); + + return ret; +} + int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process, struct vm_area_struct *vma) { phys_addr_t address; struct kfd_process_device *pdd; + int ret; /* * For simplicitly we only allow mapping of the entire doorbell @@ -129,20 +245,47 @@ int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process, vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("Mapping doorbell page\n" + pr_debug("Process %d mapping doorbell page\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", - (unsigned long long) vma->vm_start, address, vma->vm_flags, - kfd_doorbell_process_slice(dev->kfd)); + process->pasid, (unsigned long long) vma->vm_start, + address, vma->vm_flags, kfd_doorbell_process_slice(dev->kfd)); + pdd = kfd_get_process_device_data(dev, process); + if (WARN_ON_ONCE(!pdd)) + return 0; - return io_remap_pfn_range(vma, + mutex_lock(&pdd->qpd.doorbell_lock); + + ret = io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, kfd_doorbell_process_slice(dev->kfd), vma->vm_page_prot); + + if (!ret && keep_idle_process_evicted) { + vma->vm_ops = &kfd_doorbell_vm_ops; + vma->vm_private_data = pdd; + pdd->qpd.doorbell_vma = vma; + + /* If process is evicted before the first queue is created, + * process will be restored by the page fault when the + * doorbell is accessed the first time + */ + if (pdd->qpd.doorbell_mapped == -1) { + pr_debug("Process %d evicted, unmapping doorbell\n", + process->pasid); + kfd_doorbell_unmap_locked(pdd); + } else { + pdd->qpd.doorbell_mapped = 1; + } + } + + mutex_unlock(&pdd->qpd.doorbell_lock); + + return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index ea37922492093..cd07a9ca76125 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -38,7 +38,11 @@ * Wrapper around wait_queue_entry_t */ struct kfd_event_waiter { +#if defined(HAVE_WAIT_QUEUE_ENTRY) wait_queue_entry_t wait; +#else + wait_queue_t wait; +#endif struct kfd_event *event; /* Event to wait for */ bool activated; /* Becomes true when event is signaled */ bool event_age_enabled; /* set to true when last_event_age is non-zero */ @@ -265,7 +269,11 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) /* Wake up pending waiters. They will return failure */ spin_lock(&ev->lock); +#if !defined(HAVE_WAIT_QUEUE_ENTRY) + list_for_each_entry(waiter, &ev->wq.task_list, wait.task_list) +#else list_for_each_entry(waiter, &ev->wq.head, wait.entry) +#endif WRITE_ONCE(waiter->event, NULL); wake_up_all(&ev->wq); spin_unlock(&ev->lock); @@ -637,7 +645,11 @@ static void set_event(struct kfd_event *ev) WARN_ONCE(1, "event_age wrap back!"); } +#if !defined(HAVE_WAIT_QUEUE_ENTRY) + list_for_each_entry(waiter, &ev->wq.task_list, wait.task_list) +#else list_for_each_entry(waiter, &ev->wq.head, wait.entry) +#endif WRITE_ONCE(waiter->activated, true); wake_up_all(&ev->wq); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 8e0d0356e810c..37b69fe0ede38 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -129,63 +129,6 @@ enum SQ_INTERRUPT_ERROR_TYPE { KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \ >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) -static void event_interrupt_poison_consumption(struct kfd_node *dev, - uint16_t pasid, uint16_t client_id) -{ - enum amdgpu_ras_block block = 0; - int old_poison, ret = -EINVAL; - uint32_t reset = 0; - struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); - - if (!p) - return; - - /* all queues of a process will be unmapped in one time */ - old_poison = atomic_cmpxchg(&p->poison, 0, 1); - kfd_unref_process(p); - if (old_poison) - return; - - switch (client_id) { - case SOC15_IH_CLIENTID_SE0SH: - case SOC15_IH_CLIENTID_SE1SH: - case SOC15_IH_CLIENTID_SE2SH: - case SOC15_IH_CLIENTID_SE3SH: - case SOC15_IH_CLIENTID_UTCL2: - ret = kfd_dqm_evict_pasid(dev->dqm, pasid); - block = AMDGPU_RAS_BLOCK__GFX; - if (ret) - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - case SOC15_IH_CLIENTID_SDMA0: - case SOC15_IH_CLIENTID_SDMA1: - case SOC15_IH_CLIENTID_SDMA2: - case SOC15_IH_CLIENTID_SDMA3: - case SOC15_IH_CLIENTID_SDMA4: - block = AMDGPU_RAS_BLOCK__SDMA; - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - default: - break; - } - - kfd_signal_poison_consumed_event(dev, pasid); - - /* resetting queue passes, do page retirement without gpu reset - * resetting queue fails, fallback to gpu reset solution - */ - if (!ret) - dev_warn(dev->adev->dev, - "RAS poison consumption, unmap queue flow succeeded: client id %d\n", - client_id); - else - dev_warn(dev->adev->dev, - "RAS poison consumption, fall back to gpu reset flow: client id %d\n", - client_id); - - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); -} - static bool event_interrupt_isr_v10(struct kfd_node *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -332,11 +275,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID), sq_intr_err_type); - if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && - sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; - } break; default: break; @@ -362,38 +300,14 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); - } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { - event_interrupt_poison_consumption(dev, pasid, client_id); - return; - } - info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index f524a55eee116..b3f988b275a88 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -330,11 +330,14 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && - KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) - kfd_set_dbg_ev_from_interrupt(dev, pasid, - KFD_CTXID0_DOORBELL_ID(context_id0), + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { + u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); + + kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id, KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), NULL, 0); + kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id); + } /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index a9c3580be8c9b..0cb5c582ce7dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -167,11 +167,24 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SE3SH: case SOC15_IH_CLIENTID_UTCL2: block = AMDGPU_RAS_BLOCK__GFX; - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; - else + if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x00557300 and onwards */ + if (dev->adev->pm.fw_version < 0x00557300) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x05550C00 and onwards */ + if (dev->adev->pm.fw_version < 0x05550C00) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } + amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX); break; case SOC15_IH_CLIENTID_VMC: case SOC15_IH_CLIENTID_VMC1: @@ -184,11 +197,24 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: block = AMDGPU_RAS_BLOCK__SDMA; - if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; - else + if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x00557300 and onwards */ + if (dev->adev->pm.fw_version < 0x00557300) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { + /* driver mode-2 for gfx poison is only supported by + * pmfw 0x05550C00 and onwards */ + if (dev->adev->pm.fw_version < 0x05550C00) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + else + reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } + amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA); break; default: dev_warn(dev->adev->dev, @@ -431,25 +457,9 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { + if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c new file mode 100644 index 0000000000000..055a4c9364471 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c @@ -0,0 +1,276 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "kfd_ipc.h" +#include "kfd_priv.h" +#include "amdgpu_amdkfd.h" + +#define KFD_IPC_HASH_TABLE_SIZE_SHIFT 4 +#define KFD_IPC_HASH_TABLE_SIZE_MASK ((1 << KFD_IPC_HASH_TABLE_SIZE_SHIFT) - 1) + +static struct kfd_ipc_handles { + DECLARE_HASHTABLE(handles, KFD_IPC_HASH_TABLE_SIZE_SHIFT); + struct mutex lock; +} kfd_ipc_handles; + +/* Since, handles are random numbers, it can be used directly as hashing key. + * The least 4 bits of the handle are used as key. However, during import all + * 128 bits of the handle are checked to prevent handle snooping. + */ +#define HANDLE_TO_KEY(sh) ((*(uint64_t *)sh) & KFD_IPC_HASH_TABLE_SIZE_MASK) + +int kfd_ipc_store_insert(struct dma_buf *dmabuf, struct kfd_ipc_obj **ipc_obj, + uint32_t flags, uint32_t *restore_handle) +{ + struct kfd_ipc_obj *obj; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + if (restore_handle) + memcpy(obj->share_handle, restore_handle, sizeof(obj->share_handle)); + else + get_random_bytes(obj->share_handle, sizeof(obj->share_handle)); + + mutex_lock(&kfd_ipc_handles.lock); + if (restore_handle) { + struct kfd_ipc_obj *entry; + + /* When doing CRIU restore, we may have a race condition where two processes try + * to insert handles with the same key. Make sure this key does not already exist + */ + hlist_for_each_entry(entry, + &kfd_ipc_handles.handles[HANDLE_TO_KEY(obj->share_handle)], node) { + if (!memcmp(entry->share_handle, + obj->share_handle, + sizeof(entry->share_handle))) { + mutex_unlock(&kfd_ipc_handles.lock); + kfree(obj); + return -EINVAL; + } + } + } + + /* The initial ref belongs to the allocator process. + * The IPC object store itself does not hold a ref since + * there is no specific moment in time where that ref should + * be dropped, except "when there are no more userspace processes + * holding a ref to the object". Therefore the removal from IPC + * storage happens at ipc_obj release time. + */ + kref_init(&obj->ref); + obj->dmabuf = dmabuf; + obj->flags = flags; + + hlist_add_head(&obj->node, &kfd_ipc_handles.handles[HANDLE_TO_KEY(obj->share_handle)]); + mutex_unlock(&kfd_ipc_handles.lock); + + if (ipc_obj) + *ipc_obj = obj; + + return 0; +} + +static void ipc_obj_release(struct kref *r) +{ + struct kfd_ipc_obj *obj; + + obj = container_of(r, struct kfd_ipc_obj, ref); + + mutex_lock(&kfd_ipc_handles.lock); + hash_del(&obj->node); + mutex_unlock(&kfd_ipc_handles.lock); + + dma_buf_put(obj->dmabuf); + kfree(obj); +} + +static struct kfd_ipc_obj *ipc_obj_get(struct kfd_ipc_obj *obj) +{ + if (kref_get_unless_zero(&obj->ref)) + return obj; + return NULL; +} + +void kfd_ipc_obj_put(struct kfd_ipc_obj **obj) +{ + if (*obj) { + kref_put(&(*obj)->ref, ipc_obj_release); + *obj = NULL; + } +} + +int kfd_ipc_init(void) +{ + mutex_init(&kfd_ipc_handles.lock); + hash_init(kfd_ipc_handles.handles); + return 0; +} + +static int kfd_import_dmabuf_create_kfd_bo(struct kfd_node *dev, + struct kfd_process *p, + uint32_t gpu_id, + struct dma_buf *dmabuf, struct kfd_ipc_obj *ipc_obj, + uint64_t va_addr, uint64_t *handle, + uint64_t *mmap_offset, bool restore) +{ + int r; + struct kgd_mem *mem; + uint64_t size; + int idr_handle; + struct kfd_process_device *pdd = NULL; + + if (!handle) + return -EINVAL; + + if (!dev) + return -EINVAL; + + if (restore) + idr_handle = GET_IDR_HANDLE(*handle); + else + idr_handle = -1; + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) + return PTR_ERR(pdd); + + r = amdgpu_amdkfd_gpuvm_import_ipcobj(dev->adev, dmabuf, ipc_obj, + va_addr, pdd->drm_priv, + &mem, &size, mmap_offset); + if (r) + return r; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, + va_addr, size, 0, 0, idr_handle); + if (idr_handle < 0) { + r = -EFAULT; + goto err_free; + } + + *handle = MAKE_HANDLE(gpu_id, idr_handle); + + return 0; + +err_free: + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, pdd->drm_priv, NULL); + return r; +} + +int kfd_ipc_import_handle(struct kfd_node *dev, struct kfd_process *p, + uint32_t gpu_id, uint32_t *share_handle, + uint64_t va_addr, uint64_t *handle, + uint64_t *mmap_offset, uint32_t *pflags, bool restore) +{ + int r; + struct kfd_ipc_obj *entry, *found = NULL; + + mutex_lock(&kfd_ipc_handles.lock); + /* Convert the user provided handle to hash key and search only in that + * bucket + */ + hlist_for_each_entry(entry, + &kfd_ipc_handles.handles[HANDLE_TO_KEY(share_handle)], node) { + if (!memcmp(entry->share_handle, share_handle, + sizeof(entry->share_handle))) { + found = ipc_obj_get(entry); + break; + } + } + mutex_unlock(&kfd_ipc_handles.lock); + + if (!found) + return -EINVAL; + + pr_debug("Found ipc_dma_buf: %p\n", found->dmabuf); + + if (!restore) { + mutex_lock(&p->mutex); + } + + r = kfd_import_dmabuf_create_kfd_bo(dev, p, gpu_id, + found->dmabuf, found, + va_addr, handle, mmap_offset, + restore); + if (!restore) { + mutex_unlock(&p->mutex); + } + if (r) + goto error_unref; + + if (pflags) + *pflags = found->flags; + + return r; + +error_unref: + kfd_ipc_obj_put(&found); + return r; +} + +int kfd_ipc_export_as_handle(struct kfd_node *dev, struct kfd_process *p, + uint64_t handle, uint32_t *ipc_handle, + uint32_t flags) +{ + struct kfd_process_device *pdd = NULL; + struct kfd_ipc_obj *ipc_obj; + struct kfd_bo *kfd_bo = NULL; + struct kgd_mem *mem; + int r; + + if (!dev || !ipc_handle) + return -EINVAL; + + /* Protect kgd_mem object from being deleted by another thread */ + mutex_lock(&p->mutex); + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + pr_err("Failed to get pdd\n"); + r = PTR_ERR(pdd); + goto unlock; + } + + kfd_bo = kfd_process_device_find_bo(pdd, GET_IDR_HANDLE(handle)); + + if (!kfd_bo) { + pr_err("Failed to get bo"); + r = -EINVAL; + goto unlock; + } + mem = (struct kgd_mem *)kfd_bo->mem; + + r = amdgpu_amdkfd_gpuvm_export_ipc_obj(dev->adev, pdd->drm_priv, mem, + &ipc_obj, flags, NULL); + if (r) + goto unlock; + + memcpy(ipc_handle, ipc_obj->share_handle, + sizeof(ipc_obj->share_handle)); + +unlock: + mutex_unlock(&p->mutex); + return r; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.h b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.h new file mode 100644 index 0000000000000..ade507630818d --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.h @@ -0,0 +1,54 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_IPC_H_ +#define KFD_IPC_H_ + +#include +#include + +/* avoid including kfd_priv.h */ +struct kfd_node; +struct kfd_process; + +struct kfd_ipc_obj { + struct hlist_node node; + struct kref ref; + struct dma_buf *dmabuf; + uint32_t share_handle[4]; + uint32_t flags; +}; + +int kfd_ipc_import_handle(struct kfd_node *dev, struct kfd_process *p, + uint32_t gpu_id, uint32_t *share_handle, + uint64_t va_addr, uint64_t *handle, + uint64_t *mmap_offset, uint32_t *pflags, bool restore); +int kfd_ipc_export_as_handle(struct kfd_node *dev, struct kfd_process *p, + uint64_t handle, uint32_t *ipc_handle, + uint32_t flags); + +int kfd_ipc_store_insert(struct dma_buf *dmabuf, struct kfd_ipc_obj **ipc_obj, + uint32_t flags, uint32_t *restore_handle); +void kfd_ipc_obj_put(struct kfd_ipc_obj **obj); + +#endif /* KFD_IPC_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 4843dcb9a5f79..55d18aed257bc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -306,12 +306,17 @@ int kq_submit_packet(struct kernel_queue *kq) if (amdgpu_amdkfd_is_fed(kq->dev->adev)) return -EIO; + /* Make sure ring buffer is updated before wptr updated */ + mb(); + if (kq->dev->kfd->device_info.doorbell_size == 8) { *kq->wptr64_kernel = kq->pending_wptr64; + mb(); /* Make sure wptr updated before ring doorbell */ write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, kq->pending_wptr64); } else { *kq->wptr_kernel = kq->pending_wptr; + mb(); /* Make sure wptr updated before ring doorbell */ write_kernel_doorbell(kq->queue->properties.doorbell_ptr, kq->pending_wptr); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 8ee3d07ffbdfa..297d365b57bc7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -206,7 +206,11 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr) { +#ifdef HAVE_DEV_PAGEMAP_RANGE return (addr + adev->kfd.pgmap.range.start) >> PAGE_SHIFT; +#else + return (addr + adev->kfd.dev->pgmap.res.start) >> PAGE_SHIFT; +#endif } static void @@ -236,7 +240,12 @@ svm_migrate_addr(struct amdgpu_device *adev, struct page *page) unsigned long addr; addr = page_to_pfn(page) << PAGE_SHIFT; +#ifdef HAVE_DEV_PAGEMAP_RANGE return (addr - adev->kfd.pgmap.range.start); +#else + return (addr - adev->kfd.dev->pgmap.res.start); +#endif + } static struct page * @@ -302,6 +311,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); svm_migrate_get_vram_page(prange, migrate->dst[i]); migrate->dst[i] = migrate_pfn(migrate->dst[i]); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { @@ -398,8 +408,12 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, migrate.vma = vma; migrate.start = start; migrate.end = end; +#ifdef HAVE_MIGRATE_VMA_PGMAP_OWNER migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); +#elif defined(HAVE_DEV_PAGEMAP_OWNER) + migrate.src_owner = NULL; +#endif buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), @@ -641,6 +655,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; j++; } @@ -698,11 +713,17 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, migrate.vma = vma; migrate.start = start; migrate.end = end; +#ifdef HAVE_MIGRATE_VMA_PGMAP_OWNER migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); +#ifdef HAVE_DEVICE_COHERENT if (adev->gmc.xgmi.connected_to_cpu) migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT; else +#endif migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; +#elif defined(HAVE_DEV_PAGEMAP_OWNER) + migrate.src_owner = SVM_ADEV_PGMAP_OWNER(adev); +#endif buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), @@ -712,7 +733,9 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, migrate.src = buf; migrate.dst = migrate.src + npages; +#ifdef HAVE_MIGRATE_VMA_FAULT_PAGE migrate.fault_page = fault_page; +#endif scratch = (dma_addr_t *)(migrate.dst + npages); kfd_smi_event_migration_start(node, p->lead_thread->pid, @@ -1033,23 +1056,40 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev) * should remove reserved size */ size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20); +#ifdef HAVE_DEVICE_COHERENT if (adev->gmc.xgmi.connected_to_cpu) { +#ifdef HAVE_DEV_PAGEMAP_RANGE + pgmap->nr_range = 1; pgmap->range.start = adev->gmc.aper_base; pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1; +#else + pgmap->res.start = adev->gmc.aper_base; + pgmap->res.end = adev->gmc.aper_base + adev->gmc.aper_size - 1; +#endif pgmap->type = MEMORY_DEVICE_COHERENT; - } else { + } else +#endif + { res = devm_request_free_mem_region(adev->dev, &iomem_resource, size); if (IS_ERR(res)) return PTR_ERR(res); +#ifdef HAVE_DEV_PAGEMAP_RANGE + pgmap->nr_range = 1; pgmap->range.start = res->start; pgmap->range.end = res->end; +#else + pgmap->res.start = res->start; + pgmap->res.end = res->end; +#endif pgmap->type = MEMORY_DEVICE_PRIVATE; } - pgmap->nr_range = 1; pgmap->ops = &svm_migrate_pgmap_ops; +#ifdef HAVE_DEV_PAGEMAP_OWNER pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); +#endif pgmap->flags = 0; + /* Device manager releases device-specific resources, memory region and * pgmap when driver disconnects from device. */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index aee2212e52f69..5f8093e03d340 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -53,10 +53,16 @@ static int kfd_init(void) if (err < 0) goto err_topology; + err = kfd_ipc_init(); + if (err < 0) + goto err_ipc; + err = kfd_process_create_wq(); if (err < 0) goto err_create_wq; + kfd_init_peer_direct(); + /* Ignore the return value, so that we can continue * to init the KFD, even if procfs isn't craated */ @@ -67,6 +73,7 @@ static int kfd_init(void) return 0; err_create_wq: +err_ipc: kfd_topology_shutdown(); err_topology: kfd_chardev_exit(); @@ -79,6 +86,7 @@ static void kfd_exit(void) { kfd_cleanup_processes(); kfd_debugfs_fini(); + kfd_close_peer_direct(); kfd_process_destroy_wq(); kfd_procfs_shutdown(); kfd_topology_shutdown(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 17cc1f25c8d08..876cc71473293 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -102,6 +102,8 @@ struct mqd_manager { u32 *ctl_stack_used_size, u32 *save_area_used_size); + bool (*check_queue_active)(struct queue *q); + void (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size); void (*checkpoint_mqd)(struct mqd_manager *mm, @@ -115,7 +117,6 @@ struct mqd_manager { const void *mqd_src, const void *ctl_stack_src, const u32 ctl_stack_size); - #if defined(CONFIG_DEBUG_FS) int (*debugfs_show_mqd)(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 05f3ac2eaef9e..64adbde8648af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -31,6 +31,7 @@ #include "cik_regs.h" #include "cik_structs.h" #include "oss/oss_2_4_sh_mask.h" +#include "gca/gfx_7_2_sh_mask.h" static inline struct cik_mqd *get_mqd(void *mqd) { @@ -42,6 +43,31 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) return (struct cik_sdma_rlc_registers *)mqd; } +static bool check_sdma_queue_active(struct queue *q) +{ + uint32_t rptr, wptr; + struct cik_sdma_rlc_registers *m = get_sdma_mqd(q->mqd); + + rptr = m->sdma_rlc_rb_rptr; + wptr = m->sdma_rlc_rb_wptr; + pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); + + return (rptr != wptr); +} + +static bool check_queue_active(struct queue *q) +{ + uint32_t rptr, wptr; + struct cik_mqd *m = get_mqd(q->mqd); + + rptr = m->cp_hqd_pq_rptr; + wptr = m->cp_hqd_pq_wptr; + + pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); + + return (rptr != wptr); +} + static void update_cu_mask(struct mqd_manager *mm, void *mqd, struct mqd_update_info *minfo) { @@ -199,6 +225,9 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_pq_control |= NO_UPDATE_RPTR; + if (priv_cp_queues) + m->cp_hqd_pq_control |= + 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT; update_cu_mask(mm, mqd, minfo); set_priority(m, q); @@ -403,6 +432,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; + mqd->check_queue_active = check_queue_active; mqd->checkpoint_mqd = checkpoint_mqd; mqd->restore_mqd = restore_mqd; mqd->mqd_size = sizeof(struct cik_mqd); @@ -418,6 +448,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; + mqd->check_queue_active = check_queue_active; mqd->mqd_size = sizeof(struct cik_mqd); mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) @@ -447,6 +478,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; + mqd->check_queue_active = check_sdma_queue_active; mqd->checkpoint_mqd = checkpoint_mqd_sdma; mqd->restore_mqd = restore_mqd_sdma; mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index b7a08e7a44234..2b72d5b4949b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); @@ -340,6 +341,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdmax_rlcx_doorbell_offset = q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum + << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT) + & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK; + m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 84e8ea3a8a0c9..4061f36db1dc7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -59,6 +59,49 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } +static bool check_sdma_queue_active(struct queue *q) +{ + uint32_t rptr, wptr; + uint32_t rptr_hi, wptr_hi; + struct v9_sdma_mqd *m = get_sdma_mqd(q->mqd); + + rptr = m->sdmax_rlcx_rb_rptr; + wptr = m->sdmax_rlcx_rb_wptr; + rptr_hi = m->sdmax_rlcx_rb_rptr_hi; + wptr_hi = m->sdmax_rlcx_rb_wptr_hi; + pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); + pr_debug("rptr_hi=%d, wptr_hi=%d\n", rptr_hi, wptr_hi); + + return (rptr != wptr || rptr_hi != wptr_hi); +} + +static bool check_queue_active(struct queue *q) +{ + uint32_t rptr, wptr; + uint32_t cntl_stack_offset, cntl_stack_size; + struct v9_mqd *m = get_mqd(q->mqd); + + rptr = m->cp_hqd_pq_rptr; + wptr = m->cp_hqd_pq_wptr_lo % q->properties.queue_size; + cntl_stack_offset = m->cp_hqd_cntl_stack_offset; + cntl_stack_size = m->cp_hqd_cntl_stack_size; + + pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); + pr_debug("m->cp_hqd_cntl_stack_offset=0x%08x\n", cntl_stack_offset); + pr_debug("m->cp_hqd_cntl_stack_size=0x%08x\n", cntl_stack_size); + + if ((rptr == 0 && wptr == 0) || + cntl_stack_offset == 0xffffffff || + cntl_stack_size > 0x5000) + return false; + + /* Process is idle if both conditions are meet: + * queue's rptr equals to wptr + * control stack is empty, cntl_stack_offset = cntl_stack_size + */ + return (rptr != wptr || cntl_stack_offset != cntl_stack_size); +} + static void update_cu_mask(struct mqd_manager *mm, void *mqd, struct mqd_update_info *minfo, uint32_t inst) { @@ -140,7 +183,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, NUM_XCC(node->xcc_mask), &(mqd_mem_obj->gtt_mem), &(mqd_mem_obj->gpu_addr), - (void *)&(mqd_mem_obj->cpu_ptr), true); + (void *)&(mqd_mem_obj->cpu_ptr), true, true); if (retval) { kfree(mqd_mem_obj); @@ -297,6 +340,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_doorbell_control |= 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; } + if (priv_cp_queues) + m->cp_hqd_pq_control |= + 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT; if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) m->cp_hqd_ctx_save_control = 0; @@ -876,6 +922,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->allocate_mqd = allocate_mqd; mqd->free_mqd = kfd_free_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; + mqd->check_queue_active = check_queue_active; mqd->get_checkpoint_info = get_checkpoint_info; mqd->checkpoint_mqd = checkpoint_mqd; mqd->restore_mqd = restore_mqd; @@ -904,6 +951,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->free_mqd = free_mqd_hiq_sdma; mqd->update_mqd = update_mqd; mqd->is_occupied = kfd_is_occupied_cp; + mqd->check_queue_active = check_queue_active; mqd->mqd_size = sizeof(struct v9_mqd); mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) @@ -930,6 +978,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; + mqd->check_queue_active = check_queue_active; mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; @@ -943,6 +992,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; + mqd->check_queue_active = check_sdma_queue_active; mqd->checkpoint_mqd = checkpoint_mqd_sdma; mqd->restore_mqd = restore_mqd_sdma; mqd->mqd_size = sizeof(struct v9_sdma_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index c1fafc5025158..23669e908d504 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -45,6 +45,45 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) return (struct vi_sdma_mqd *)mqd; } +static bool check_sdma_queue_active(struct queue *q) +{ + uint32_t rptr, wptr; + struct vi_sdma_mqd *m = get_sdma_mqd(q->mqd); + + rptr = m->sdmax_rlcx_rb_rptr; + wptr = m->sdmax_rlcx_rb_wptr; + pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); + + return (rptr != wptr); +} + +static bool check_queue_active(struct queue *q) +{ + uint32_t rptr, wptr; + uint32_t cntl_stack_offset, cntl_stack_size; + struct vi_mqd *m = get_mqd(q->mqd); + + rptr = m->cp_hqd_pq_rptr; + wptr = m->cp_hqd_pq_wptr; + cntl_stack_offset = m->cp_hqd_cntl_stack_offset; + cntl_stack_size = m->cp_hqd_cntl_stack_size; + + pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); + pr_debug("m->cp_hqd_cntl_stack_offset=0x%08x\n", cntl_stack_offset); + pr_debug("m->cp_hqd_cntl_stack_size=0x%08x\n", cntl_stack_size); + + if ((rptr == 0 && wptr == 0) || + cntl_stack_offset == 0xffffffff || + cntl_stack_size > 0x5000) + return false; + + /* Process is idle if both conditions are meet: + * queue's rptr equals to wptr + * control stack is empty, cntl_stack_offset = cntl_stack_size + */ + return (rptr != wptr || cntl_stack_offset != cntl_stack_size); +} + static void update_cu_mask(struct mqd_manager *mm, void *mqd, struct mqd_update_info *minfo) { @@ -225,7 +264,9 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } - + if (priv_cp_queues) + m->cp_hqd_pq_control |= + 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT; if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) m->cp_hqd_ctx_save_control = atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | @@ -461,6 +502,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; mqd->get_wave_state = get_wave_state; + mqd->check_queue_active = check_queue_active; mqd->get_checkpoint_info = get_checkpoint_info; mqd->checkpoint_mqd = checkpoint_mqd; mqd->restore_mqd = restore_mqd; @@ -477,6 +519,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; + mqd->check_queue_active = check_queue_active; mqd->mqd_size = sizeof(struct vi_mqd); mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) @@ -492,6 +535,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; + mqd->check_queue_active = check_queue_active; mqd->mqd_size = sizeof(struct vi_mqd); mqd->mqd_stride = kfd_mqd_stride; #if defined(CONFIG_DEBUG_FS) @@ -506,6 +550,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = kfd_destroy_mqd_sdma; mqd->is_occupied = kfd_is_occupied_sdma; + mqd->check_queue_active = check_sdma_queue_active; mqd->checkpoint_mqd = checkpoint_mqd_sdma; mqd->restore_mqd = restore_mqd_sdma; mqd->mqd_size = sizeof(struct vi_sdma_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 00776f08351c3..1f9f5bfeaf868 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -37,11 +37,14 @@ static int pm_map_process_v9(struct packet_manager *pm, struct kfd_node *kfd = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); + if (adev->enforce_isolation[kfd->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -89,14 +92,18 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, struct pm4_mes_map_process_aldebaran *packet; uint64_t vm_page_table_base_addr = qpd->page_table_base; struct kfd_dev *kfd = pm->dqm->dev->kfd; + struct kfd_node *knode = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); int i; + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process_aldebaran *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); + if (adev->enforce_isolation[knode->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -144,17 +151,22 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, int concurrent_proc_cnt = 0; struct kfd_node *kfd = pm->dqm->dev; + struct amdgpu_device *adev = kfd->adev; /* Determine the number of processes to map together to HW: * it can not exceed the number of VMIDs available to the * scheduler, and it is determined by the smaller of the number * of processes in the runlist and kfd module parameter * hws_max_conc_proc. + * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs + * cleaner between process switch), enable single-process mode + * in HWS. * Note: the arbitration between the number of VMIDs and * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = min(pm->dqm->processes_count, + concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c new file mode 100644 index 0000000000000..c829676d631a6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "kfd_priv.h" +#include "amdgpu_amdkfd.h" +#include "kfd_pc_sampling.h" +#include "kfd_debug.h" +#include "kfd_device_queue_manager.h" + +/* + * PC Sampling revision change log + * + * 0.1 - Initial revision + * 0.2 - Support gfx9_4_3 Host Trap PC sampling + */ +#define KFD_IOCTL_PCS_MAJOR_VERSION 0 +#define KFD_IOCTL_PCS_MINOR_VERSION 2 + +struct supported_pc_sample_info { + uint32_t ip_version; + const struct kfd_pc_sample_info *sample_info; +}; + +const struct kfd_pc_sample_info sample_info_hosttrap_9_0_0 = { + 0, 1, ~0ULL, 0, KFD_IOCTL_PCS_METHOD_HOSTTRAP, KFD_IOCTL_PCS_TYPE_TIME_US }; + +struct supported_pc_sample_info supported_formats[] = { + { IP_VERSION(9, 4, 2), &sample_info_hosttrap_9_0_0 }, + { IP_VERSION(9, 4, 3), &sample_info_hosttrap_9_0_0 }, +}; + +static int kfd_pc_sample_thread(void *param) +{ + struct amdgpu_device *adev; + struct kfd_node *node = param; + uint32_t timeout = 0; + ktime_t next_trap_time; + bool need_wait; + uint32_t inst; + + mutex_lock(&node->pcs_data.mutex); + if (node->pcs_data.hosttrap_entry.base.active_count && + node->pcs_data.hosttrap_entry.base.pc_sample_info.interval && + node->kfd2kgd->trigger_pc_sample_trap) { + switch (node->pcs_data.hosttrap_entry.base.pc_sample_info.type) { + case KFD_IOCTL_PCS_TYPE_TIME_US: + timeout = (uint32_t)node->pcs_data.hosttrap_entry.base.pc_sample_info.interval; + break; + default: + pr_debug("PC Sampling type %d not supported.", + node->pcs_data.hosttrap_entry.base.pc_sample_info.type); + } + } + mutex_unlock(&node->pcs_data.mutex); + if (!timeout) + return -EINVAL; + + adev = node->adev; + need_wait = false; + allow_signal(SIGKILL); + + if (node->kfd2kgd->override_core_cg) + for_each_inst(inst, node->xcc_mask) + node->kfd2kgd->override_core_cg(adev, 1, inst); + + while (!kthread_should_stop() && + !signal_pending(node->pcs_data.hosttrap_entry.base.pc_sample_thread)) { + if (!need_wait) { + next_trap_time = ktime_add_us(ktime_get_raw(), timeout); + + for_each_inst(inst, node->xcc_mask) { + node->kfd2kgd->trigger_pc_sample_trap(adev, node->vm_info.last_vmid_kfd, + &node->pcs_data.hosttrap_entry.base.target_simd, + &node->pcs_data.hosttrap_entry.base.target_wave_slot, + node->pcs_data.hosttrap_entry.base.pc_sample_info.method, + inst); + } + pr_debug_ratelimited("triggered a host trap."); + need_wait = true; + } else { + ktime_t wait_time; + s64 wait_ns, wait_us; + + wait_time = ktime_sub(next_trap_time, ktime_get_raw()); + wait_ns = ktime_to_ns(wait_time); + wait_us = ktime_to_us(wait_time); + if (wait_ns >= 10000) { + usleep_range(wait_us - 10, wait_us); + } else { + schedule(); + if (wait_ns <= 0) + need_wait = false; + } + } + } + if (node->kfd2kgd->override_core_cg) + for_each_inst(inst, node->xcc_mask) + node->kfd2kgd->override_core_cg(adev, 0, inst); + + node->pcs_data.hosttrap_entry.base.target_simd = 0; + node->pcs_data.hosttrap_entry.base.target_wave_slot = 0; + node->pcs_data.hosttrap_entry.base.pc_sample_thread = NULL; + + return 0; +} + +static int kfd_pc_sample_thread_start(struct kfd_node *node) +{ + char thread_name[16]; + int ret = 0; + + snprintf(thread_name, 16, "pcs_%d", node->adev->ddev.render->index); + node->pcs_data.hosttrap_entry.base.pc_sample_thread = + kthread_run(kfd_pc_sample_thread, node, thread_name); + + if (IS_ERR(node->pcs_data.hosttrap_entry.base.pc_sample_thread)) { + ret = PTR_ERR(node->pcs_data.hosttrap_entry.base.pc_sample_thread); + node->pcs_data.hosttrap_entry.base.pc_sample_thread = NULL; + pr_debug("Failed to create pc sample thread for %s with ret = %d.", + thread_name, ret); + } + + return ret; +} + +static int kfd_pc_sample_query_cap(struct kfd_process_device *pdd, + struct kfd_ioctl_pc_sample_args __user *user_args) +{ + uint64_t sample_offset; + int num_method = 0; + int ret; + int i; + const uint32_t user_num_sample_info = user_args->num_sample_info; + + /* use version field to pass back pc sampling revision temporarily, not for upstream */ + user_args->version = KFD_IOCTL_PCS_MAJOR_VERSION << 16 | KFD_IOCTL_PCS_MINOR_VERSION; + + for (i = 0; i < ARRAY_SIZE(supported_formats); i++) + if (KFD_GC_VERSION(pdd->dev) == supported_formats[i].ip_version) + num_method++; + + if (!num_method) { + pr_debug("PC Sampling not supported on GC_HWIP:0x%x.", + pdd->dev->adev->ip_versions[GC_HWIP][0]); + return -EOPNOTSUPP; + } + + ret = 0; + mutex_lock(&pdd->dev->pcs_data.mutex); + if (user_args->flags != KFD_IOCTL_PCS_QUERY_TYPE_FULL && + pdd->dev->pcs_data.hosttrap_entry.base.use_count) { + /* If we already have a session, restrict returned list to current method */ + user_args->num_sample_info = 1; + + if (user_args->sample_info_ptr) + ret = copy_to_user((void __user *) user_args->sample_info_ptr, + &pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info, + sizeof(struct kfd_pc_sample_info)); + mutex_unlock(&pdd->dev->pcs_data.mutex); + return ret ? -EFAULT : 0; + } + mutex_unlock(&pdd->dev->pcs_data.mutex); + + user_args->num_sample_info = num_method; + + if (!user_args->sample_info_ptr || !user_num_sample_info) { + /* + * User application is querying the size of buffer needed. Application will + * allocate required buffer size and send a second query. + */ + return 0; + } else if (user_num_sample_info < num_method) { + pr_debug("ASIC requires space for %d kfd_pc_sample_info entries.", num_method); + return -ENOSPC; + } + + sample_offset = user_args->sample_info_ptr; + for (i = 0; i < ARRAY_SIZE(supported_formats); i++) { + if (KFD_GC_VERSION(pdd->dev) == supported_formats[i].ip_version) { + ret = copy_to_user((void __user *) sample_offset, + supported_formats[i].sample_info, sizeof(struct kfd_pc_sample_info)); + if (ret) { + pr_debug("Failed to copy PC sampling info to user."); + return -EFAULT; + } + sample_offset += sizeof(struct kfd_pc_sample_info); + } + } + + return 0; +} + +static int kfd_pc_sample_start(struct kfd_process_device *pdd, + struct pc_sampling_entry *pcs_entry) +{ + bool pc_sampling_start = false; + int ret = 0; + + pcs_entry->enabled = true; + mutex_lock(&pdd->dev->pcs_data.mutex); + + kfd_process_set_trap_pc_sampling_flag(&pdd->qpd, + pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method, true); + + if (!pdd->dev->pcs_data.hosttrap_entry.base.active_count) + pc_sampling_start = true; + + pdd->dev->pcs_data.hosttrap_entry.base.active_count++; + mutex_unlock(&pdd->dev->pcs_data.mutex); + + while (pc_sampling_start) { + /* true means pc_sample_thread stop is in progress */ + if (READ_ONCE(pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_thread)) { + usleep_range(1000, 2000); + } else { + ret = kfd_pc_sample_thread_start(pdd->dev); + break; + } + } + + return ret; +} + +static int kfd_pc_sample_stop(struct kfd_process_device *pdd, + struct pc_sampling_entry *pcs_entry) +{ + bool pc_sampling_stop = false; + + pcs_entry->enabled = false; + mutex_lock(&pdd->dev->pcs_data.mutex); + pdd->dev->pcs_data.hosttrap_entry.base.active_count--; + if (!pdd->dev->pcs_data.hosttrap_entry.base.active_count) + pc_sampling_stop = true; + + mutex_unlock(&pdd->dev->pcs_data.mutex); + + kfd_process_set_trap_pc_sampling_flag(&pdd->qpd, + pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method, false); + remap_queue(pdd->dev->dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); + + if (pc_sampling_stop) + kthread_stop(pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_thread); + + return 0; +} + +static int kfd_pc_sample_create(struct kfd_process_device *pdd, + struct kfd_ioctl_pc_sample_args __user *user_args) +{ + struct kfd_pc_sample_info *supported_format = NULL; + struct kfd_pc_sample_info user_info; + struct pc_sampling_entry *pcs_entry; + int ret; + int i; + + if (user_args->num_sample_info != 1) + return -EINVAL; + + ret = copy_from_user(&user_info, (void __user *) user_args->sample_info_ptr, + sizeof(struct kfd_pc_sample_info)); + if (ret) { + pr_debug("Failed to copy PC sampling info from user\n"); + return -EFAULT; + } + + if (user_info.flags & KFD_IOCTL_PCS_FLAG_POWER_OF_2 && + user_info.interval & (user_info.interval - 1)) { + pr_debug("Sampling interval's power is unmatched!"); + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(supported_formats); i++) { + if (KFD_GC_VERSION(pdd->dev) == supported_formats[i].ip_version + && user_info.method == supported_formats[i].sample_info->method + && user_info.type == supported_formats[i].sample_info->type + && user_info.interval <= supported_formats[i].sample_info->interval_max + && user_info.interval >= supported_formats[i].sample_info->interval_min) { + supported_format = + (struct kfd_pc_sample_info *)supported_formats[i].sample_info; + break; + } + } + + if (!supported_format) { + pr_debug("Sampling format is not supported!"); + return -EOPNOTSUPP; + } + + mutex_lock(&pdd->dev->pcs_data.mutex); + if (pdd->dev->pcs_data.hosttrap_entry.base.use_count && + memcmp(&pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info, + &user_info, sizeof(user_info))) { + ret = copy_to_user((void __user *) user_args->sample_info_ptr, + &pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info, + sizeof(struct kfd_pc_sample_info)); + mutex_unlock(&pdd->dev->pcs_data.mutex); + return ret ? -EFAULT : -EEXIST; + } + + pcs_entry = kzalloc(sizeof(*pcs_entry), GFP_KERNEL); + if (!pcs_entry) { + mutex_unlock(&pdd->dev->pcs_data.mutex); + return -ENOMEM; + } + + i = idr_alloc_cyclic(&pdd->dev->pcs_data.hosttrap_entry.base.pc_sampling_idr, + pcs_entry, 1, 0, GFP_KERNEL); + if (i < 0) { + mutex_unlock(&pdd->dev->pcs_data.mutex); + kfree(pcs_entry); + return i; + } + + if (!pdd->dev->pcs_data.hosttrap_entry.base.use_count) + pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info = user_info; + + pdd->dev->pcs_data.hosttrap_entry.base.use_count++; + mutex_unlock(&pdd->dev->pcs_data.mutex); + + pcs_entry->pdd = pdd; + user_args->trace_id = (uint32_t)i; + + /* + * Set SPI_GDBG_PER_VMID_CNTL.TRAP_EN so that TTMP registers are valid in the sampling data + * p->runtime_info.ttmp_setup will be cleared when user application calls runtime_disable + * on exit. + */ + kfd_dbg_enable_ttmp_setup(pdd->process); + pdd->process->pc_sampling_ref++; + + pr_debug("alloc pcs_entry = %p, trace_id = 0x%x on gpu 0x%x", pcs_entry, i, pdd->dev->id); + + return 0; +} + +static int kfd_pc_sample_destroy(struct kfd_process_device *pdd, uint32_t trace_id, + struct pc_sampling_entry *pcs_entry) +{ + pr_debug("free pcs_entry = %p, trace_id = 0x%x on gpu 0x%x", + pcs_entry, trace_id, pdd->dev->id); + + pdd->process->pc_sampling_ref--; + mutex_lock(&pdd->dev->pcs_data.mutex); + pdd->dev->pcs_data.hosttrap_entry.base.use_count--; + idr_remove(&pdd->dev->pcs_data.hosttrap_entry.base.pc_sampling_idr, trace_id); + + if (!pdd->dev->pcs_data.hosttrap_entry.base.use_count) + memset(&pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info, 0x0, + sizeof(struct kfd_pc_sample_info)); + mutex_unlock(&pdd->dev->pcs_data.mutex); + + kfree(pcs_entry); + + return 0; +} + +void kfd_pc_sample_release(struct kfd_process_device *pdd) +{ + struct pc_sampling_entry *pcs_entry; + struct idr *idp; + uint32_t id; + + /* force to release all PC sampling task for this process */ + idp = &pdd->dev->pcs_data.hosttrap_entry.base.pc_sampling_idr; + do { + pcs_entry = NULL; + mutex_lock(&pdd->dev->pcs_data.mutex); + idr_for_each_entry(idp, pcs_entry, id) { + if (pcs_entry->pdd != pdd) + continue; + break; + } + mutex_unlock(&pdd->dev->pcs_data.mutex); + if (pcs_entry) { + if (pcs_entry->enabled) + kfd_pc_sample_stop(pdd, pcs_entry); + kfd_pc_sample_destroy(pdd, id, pcs_entry); + } + } while (pcs_entry); +} + +int kfd_pc_sample(struct kfd_process_device *pdd, + struct kfd_ioctl_pc_sample_args __user *args) +{ + struct pc_sampling_entry *pcs_entry; + + if (args->op != KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES && + args->op != KFD_IOCTL_PCS_OP_CREATE) { + + mutex_lock(&pdd->dev->pcs_data.mutex); + pcs_entry = idr_find(&pdd->dev->pcs_data.hosttrap_entry.base.pc_sampling_idr, + args->trace_id); + mutex_unlock(&pdd->dev->pcs_data.mutex); + + /* pcs_entry is only for this pc sampling process, + * which has kfd_process->mutex protected here. + */ + if (!pcs_entry || + pcs_entry->pdd != pdd) + return -EINVAL; + } else if (pdd->process->debug_trap_enabled) { + pr_debug("Cannot have PC Sampling and debug trap simultaneously"); + return -EBUSY; + } + + switch (args->op) { + case KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES: + return kfd_pc_sample_query_cap(pdd, args); + + case KFD_IOCTL_PCS_OP_CREATE: + return kfd_pc_sample_create(pdd, args); + + case KFD_IOCTL_PCS_OP_DESTROY: + if (pcs_entry->enabled) + return -EBUSY; + else + return kfd_pc_sample_destroy(pdd, args->trace_id, pcs_entry); + + case KFD_IOCTL_PCS_OP_START: + if (pcs_entry->enabled) + return -EALREADY; + else + return kfd_pc_sample_start(pdd, pcs_entry); + + case KFD_IOCTL_PCS_OP_STOP: + if (!pcs_entry->enabled) + return -EALREADY; + else + return kfd_pc_sample_stop(pdd, pcs_entry); + } + + return -EINVAL; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h new file mode 100644 index 0000000000000..6175563ca9bea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_PC_SAMPLING_H_ +#define KFD_PC_SAMPLING_H_ + +#include "amdgpu.h" +#include "kfd_priv.h" + +int kfd_pc_sample(struct kfd_process_device *pdd, + struct kfd_ioctl_pc_sample_args __user *args); +void kfd_pc_sample_release(struct kfd_process_device *pdd); + +#endif /* KFD_PC_SAMPLING_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c new file mode 100644 index 0000000000000..ed93247d83caa --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c @@ -0,0 +1,710 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* NOTE: + * + * This file contains logic to dynamically detect and enable PeerDirect + * suppor. PeerDirect support is delivered e.g. as part of OFED + * from Mellanox. Because we are not able to rely on the fact that the + * corresponding OFED will be installed we should: + * - copy PeerDirect definitions locally to avoid dependency on + * corresponding header file + * - try dynamically detect address of PeerDirect function + * pointers. + * + * If dynamic detection failed then PeerDirect support should be + * enabled using the standard PeerDirect bridge driver from: + * https://github.com/RadeonOpenCompute/ROCnRDMA + * + * + * Logic to support PeerDirect relies only on official public API to be + * non-intrusive as much as possible. + * + **/ + +#include +#include +#include +#include +#include +#include + +#include "kfd_priv.h" + +/* ----------------------- PeerDirect interface ------------------------------*/ + +/* + * Copyright (c) 2013, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#define IB_PEER_MEMORY_NAME_MAX 64 +#define IB_PEER_MEMORY_VER_MAX 16 + +struct peer_memory_client { + char name[IB_PEER_MEMORY_NAME_MAX]; + char version[IB_PEER_MEMORY_VER_MAX]; + /* acquire return code: 1-mine, 0-not mine */ + int (*acquire)(unsigned long addr, size_t size, + void *peer_mem_private_data, + char *peer_mem_name, + void **client_context); + int (*get_pages)(unsigned long addr, + size_t size, int write, int force, + struct sg_table *sg_head, + void *client_context, void *core_context); + int (*dma_map)(struct sg_table *sg_head, void *client_context, + struct device *dma_device, int dmasync, int *nmap); + int (*dma_unmap)(struct sg_table *sg_head, void *client_context, + struct device *dma_device); + void (*put_pages)(struct sg_table *sg_head, void *client_context); + unsigned long (*get_page_size)(void *client_context); + void (*release)(void *client_context); + void* (*get_context_private_data)(u64 peer_id); + void (*put_context_private_data)(void *context); +}; + +typedef int (*invalidate_peer_memory)(void *reg_handle, + void *core_context); + +void *ib_register_peer_memory_client(struct peer_memory_client *peer_client, + invalidate_peer_memory *invalidate_callback); +void ib_unregister_peer_memory_client(void *reg_handle); + + +/*------------------- PeerDirect bridge driver ------------------------------*/ + +#define AMD_PEER_BRIDGE_DRIVER_VERSION "1.0" +#define AMD_PEER_BRIDGE_DRIVER_NAME "amdkfd" + +static char rdma_name[] = "AMD RDMA"; + + +static void* (*pfn_ib_register_peer_memory_client)(struct peer_memory_client + *peer_client, + invalidate_peer_memory + *invalidate_callback); + +static void (*pfn_ib_unregister_peer_memory_client)(void *reg_handle); + +static void *ib_reg_handle; + +struct amd_mem_context { + uint64_t va; + uint64_t size; + unsigned long offset; + struct amdgpu_bo *bo; + struct kfd_node *dev; + + struct sg_table *pages; + struct device *dma_dev; + + + /* Context received from PeerDirect call */ + void *core_context; + + pid_t pid; + uint32_t flags; +}; + +/* Workaround: Mellanox peerdirect driver expects sg lists at + * page granularity. This causes failures when an application tries + * to register size < PAGE_SIZE or addr starts at some offset. Fix + * it by aligning the size to page size and addr to page boundary. + */ +static void align_addr_size(unsigned long *addr, size_t *size) +{ + unsigned long end = ALIGN(*addr + *size, PAGE_SIZE); + + *addr = ALIGN_DOWN(*addr, PAGE_SIZE); + *size = end - *addr; +} + +static int amd_acquire(unsigned long addr, size_t size, + void *peer_mem_private_data, + char *peer_mem_name, void **client_context) +{ + struct kfd_process *p; + struct kfd_bo *buf_obj; + struct amd_mem_context *mem_context; + + if (peer_mem_name == rdma_name) { + p = peer_mem_private_data; + } else { + p = kfd_get_process(current); + if (IS_ERR(p)) { + pr_debug("Not a KFD process\n"); + return 0; + } + } + + align_addr_size(&addr, &size); + + mutex_lock(&p->mutex); + buf_obj = kfd_process_find_bo_from_interval(p, addr, + addr + size - 1); + if (!buf_obj) { + pr_debug("Cannot find a kfd_bo for the range\n"); + goto out_unlock; + } + + /* Initialize context used for operation with given address */ + mem_context = kzalloc(sizeof(*mem_context), GFP_KERNEL); + if (!mem_context) + goto out_unlock; + + mem_context->pid = p->lead_thread->pid; + + pr_debug("addr: %#lx, size: %#lx, pid: %d\n", + addr, size, mem_context->pid); + + mem_context->va = addr; + mem_context->size = size; + mem_context->offset = addr - buf_obj->it.start; + + mem_context->bo = amdgpu_amdkfd_gpuvm_get_bo_ref(buf_obj->mem, + &mem_context->flags); + mem_context->dev = buf_obj->dev; + + mutex_unlock(&p->mutex); + + pr_debug("Client context: 0x%p\n", mem_context); + + /* Return pointer to allocated context */ + *client_context = mem_context; + + /* Return 1 to inform that this address which will be handled + * by AMD GPU driver + */ + return 1; + +out_unlock: + mutex_unlock(&p->mutex); + return 0; +} + +static int amd_get_pages(unsigned long addr, size_t size, int write, int force, + struct sg_table *sg_head, + void *client_context, void *core_context) +{ + int ret; + struct amd_mem_context *mem_context = + (struct amd_mem_context *)client_context; + + align_addr_size(&addr, &size); + + pr_debug("addr: %#lx, size: %#lx, core_context: 0x%p\n", + addr, size, core_context); + + if (!mem_context || !mem_context->bo || !mem_context->dev) { + pr_warn("Invalid client context"); + return -EINVAL; + } + + pr_debug("pid: %d\n", mem_context->pid); + + if (addr != mem_context->va) { + pr_warn("Context address (%#llx) is not the same\n", + mem_context->va); + return -EINVAL; + } + + if (size != mem_context->size) { + pr_warn("Context size (%#llx) is not the same\n", + mem_context->size); + return -EINVAL; + } + + ret = amdgpu_amdkfd_gpuvm_pin_bo(mem_context->bo, + mem_context->bo->kfd_bo->domain); + if (ret) { + pr_err("Pinning of buffer failed.\n"); + return ret; + } + + /* Mark the device as active */ + kfd_inc_compute_active(mem_context->dev); + + mem_context->core_context = core_context; + + return 0; +} + + +static int amd_dma_map(struct sg_table *sg_head, void *client_context, + struct device *dma_device, int dmasync, int *nmap) +{ + struct sg_table *sg_table_tmp; + struct scatterlist *sg; + int ret; + int i; + + /* + * NOTE/TODO: + * We could have potentially three cases for real memory + * location: + * - all memory in the local + * - all memory in the system (RAM) + * - memory is spread (s/g) between local and system. + * + * In the case of all memory in the system we could use + * iommu driver to build DMA addresses but not in the case + * of local memory because currently iommu driver doesn't + * deal with local/device memory addresses (it requires "struct + * page"). + * + * Accordingly returning assumes that iommu funcutionality + * should be disabled so we can assume that sg_table already + * contains DMA addresses. + * + */ + struct amd_mem_context *mem_context = + (struct amd_mem_context *)client_context; + + pr_debug("Client context: 0x%p, sg_head: 0x%p\n", + client_context, sg_head); + + if (!mem_context || !mem_context->bo) { + pr_warn("Invalid client context"); + return -EINVAL; + } + + pr_debug("pid: %d, address: %#llx, size: %#llx\n", + mem_context->pid, + mem_context->va, + mem_context->size); + + /* Build sg_table for buffer being exported, including DMA mapping */ + ret = amdgpu_amdkfd_gpuvm_get_sg_table( + mem_context->dev->adev, mem_context->bo, mem_context->flags, + mem_context->offset, mem_context->size, + dma_device, DMA_BIDIRECTIONAL, &sg_table_tmp); + if (ret) { + pr_err("Building of sg_table failed\n"); + return ret; + } + + pr_debug("size 0x%llx nents %d\n", mem_context->size, sg_table_tmp->nents); + for_each_sgtable_sg(sg_table_tmp, sg, i) + pr_debug("segment_%d dma_address 0x%llx length 0x%x dma_length 0x%x\n", + i, sg->dma_address, sg->length, sg->dma_length); + + /* Maintain a copy of the handle to sg_table */ + mem_context->pages = sg_table_tmp; + mem_context->dma_dev = dma_device; + + /* Copy information about previosly allocated sg_table */ + *sg_head = *mem_context->pages; + + /* Return number of sg table segments */ + *nmap = mem_context->pages->nents; + + return ret; +} + +static int amd_dma_unmap(struct sg_table *sg_head, void *client_context, + struct device *dma_device) +{ + struct amd_mem_context *mem_context = + (struct amd_mem_context *)client_context; + + pr_debug("Client context: 0x%p, sg_table: 0x%p\n", + client_context, sg_head); + + if (!mem_context || !mem_context->bo) { + pr_warn("Invalid client context"); + return -EINVAL; + } + + pr_debug("pid: %d, address: %#llx, size: %#llx\n", + mem_context->pid, + mem_context->va, + mem_context->size); + + /* Release the mapped pages of buffer */ + amdgpu_amdkfd_gpuvm_put_sg_table(mem_context->bo, + mem_context->dma_dev, + DMA_BIDIRECTIONAL, + mem_context->pages); + mem_context->pages = NULL; + + return 0; +} + +static void amd_put_pages(struct sg_table *sg_head, void *client_context) +{ + struct amd_mem_context *mem_context = + (struct amd_mem_context *)client_context; + + pr_debug("Client context: 0x%p, sg_head: 0x%p\n", + client_context, sg_head); + pr_debug("pid: %d, address: %#llx, size: %#llx\n", + mem_context->pid, + mem_context->va, + mem_context->size); + + amdgpu_amdkfd_gpuvm_unpin_bo(mem_context->bo); + kfd_dec_compute_active(mem_context->dev); +} + +static unsigned long amd_get_page_size(void *client_context) +{ + return PAGE_SIZE; +} + +static void amd_release(void *client_context) +{ + struct amd_mem_context *mem_context = + (struct amd_mem_context *)client_context; + + pr_debug("Client context: 0x%p\n", client_context); + pr_debug("pid: %d, address: %#llx, size: %#llx\n", + mem_context->pid, + mem_context->va, + mem_context->size); + + amdgpu_amdkfd_gpuvm_put_bo_ref(mem_context->bo); + + kfree(mem_context); +} + + +static struct peer_memory_client amd_mem_client = { + .acquire = amd_acquire, + .get_pages = amd_get_pages, + .dma_map = amd_dma_map, + .dma_unmap = amd_dma_unmap, + .put_pages = amd_put_pages, + .get_page_size = amd_get_page_size, + .release = amd_release, + .get_context_private_data = NULL, + .put_context_private_data = NULL, +}; + +/** Initialize PeerDirect interface with RDMA Network stack. + * + * Because network stack could potentially be loaded later we check + * presence of PeerDirect when HSA process is created. If PeerDirect was + * already initialized we do nothing otherwise try to detect and register. + */ +void kfd_init_peer_direct(void) +{ + if (pfn_ib_unregister_peer_memory_client) { + pr_debug("PeerDirect support was already initialized\n"); + return; + } + + pr_debug("Try to initialize PeerDirect support\n"); + + pfn_ib_register_peer_memory_client = + (void *(*)(struct peer_memory_client *, + invalidate_peer_memory *)) + symbol_request(ib_register_peer_memory_client); + + pfn_ib_unregister_peer_memory_client = (void (*)(void *)) + symbol_request(ib_unregister_peer_memory_client); + + if (!pfn_ib_register_peer_memory_client || + !pfn_ib_unregister_peer_memory_client) { + pr_debug("PeerDirect interface was not detected\n"); + /* Do cleanup */ + kfd_close_peer_direct(); + return; + } + + strcpy(amd_mem_client.name, AMD_PEER_BRIDGE_DRIVER_NAME); + strcpy(amd_mem_client.version, AMD_PEER_BRIDGE_DRIVER_VERSION); + + ib_reg_handle = pfn_ib_register_peer_memory_client(&amd_mem_client, NULL); + + if (!ib_reg_handle) { + pr_err("Cannot register peer memory client\n"); + /* Do cleanup */ + kfd_close_peer_direct(); + return; + } + + pr_info("PeerDirect support was initialized successfully\n"); +} + +/** + * Close connection with PeerDirect interface with RDMA Network stack. + * + */ +void kfd_close_peer_direct(void) +{ + if (pfn_ib_unregister_peer_memory_client) { + if (ib_reg_handle) + pfn_ib_unregister_peer_memory_client(ib_reg_handle); + + symbol_put(ib_unregister_peer_memory_client); + } + + if (pfn_ib_register_peer_memory_client) + symbol_put(ib_register_peer_memory_client); + + + /* Reset pointers to be safe */ + pfn_ib_unregister_peer_memory_client = NULL; + pfn_ib_register_peer_memory_client = NULL; + ib_reg_handle = NULL; +} + +/* ------------------------- AMD RDMA wrapper --------------------------------*/ + +#include "drm/amd_rdma.h" + +struct rdma_p2p_data { + struct amd_p2p_info p2p_info; + void (*free_callback)(void *client_priv); + void *client_priv; +}; + +/** + * This function makes the pages underlying a range of GPU virtual memory + * accessible for DMA operations from another PCIe device + * + * \param address - The start address in the Unified Virtual Address + * space in the specified process + * \param length - The length of requested mapping + * \param pid - Pointer to structure pid to which address belongs. + * Could be NULL for current process address space. + * \param p2p_data - On return: Pointer to structure describing + * underlying pages/locations + * \param free_callback - Pointer to callback which will be called when access + * to such memory must be stopped immediately: Memory + * was freed, GECC events, etc. + * Client should immediately stop any transfer + * operations and returned as soon as possible. + * After return all resources associated with address + * will be release and no access will be allowed. + * \param client_priv - Pointer to be passed as parameter on + * 'free_callback; + * + * \return 0 if operation was successful + */ +static int rdma_get_pages(uint64_t address, uint64_t length, struct pid *pid, + struct device *dma_dev, + struct amd_p2p_info **amd_p2p_data, + void (*free_callback)(void *client_priv), + void *client_priv) +{ + struct rdma_p2p_data *p2p_data; + struct kfd_process *p; + struct sg_table sg_head; + struct amd_mem_context *mem_context; + int nmap; + int r; + + p2p_data = kzalloc(sizeof(*p2p_data), GFP_KERNEL); + if (!p2p_data) + return -ENOMEM; + + p = kfd_lookup_process_by_pid(pid); + if (!p) { + pr_debug("pid lookup failed\n"); + r = -ESRCH; + goto err_lookup_process; + } + + r = amd_acquire(address, length, p, rdma_name, (void **)&mem_context); + kfd_unref_process(p); + if (r == 0) { + pr_debug("acquire failed: %d\n", r); + goto err_acquire; + } + + r = amd_get_pages(address, length, 1, 0, &sg_head, + mem_context, p2p_data); + if (r) { + pr_debug("get_pages failed: %d\n", r); + goto err_get_pages; + } + + r = amd_dma_map(&sg_head, mem_context, dma_dev, 0, &nmap); + if (r) { + pr_debug("dma_map failed: %d\n", r); + goto err_dma_map; + } + + + p2p_data->free_callback = free_callback; + p2p_data->client_priv = client_priv; + p2p_data->p2p_info.va = address; + p2p_data->p2p_info.size = length; + p2p_data->p2p_info.pid = pid; + p2p_data->p2p_info.pages = mem_context->pages; + p2p_data->p2p_info.priv = mem_context; + + *amd_p2p_data = &p2p_data->p2p_info; + + return 0; + +err_dma_map: + amd_put_pages(&sg_head, mem_context); +err_get_pages: + amd_release(mem_context); +err_acquire: +err_lookup_process: + kfree(p2p_data); + + return r; +} + +/** + * + * This function release resources previously allocated by get_pages() call. + * + * \param p_p2p_data - A pointer to pointer to amd_p2p_info entries + * allocated by get_pages() call. + * + * \return 0 if operation was successful + */ +static int rdma_put_pages(struct amd_p2p_info **p_p2p_data) +{ + struct rdma_p2p_data *p2p_data = + container_of(*p_p2p_data, struct rdma_p2p_data, p2p_info); + int r; + + r = amd_dma_unmap(p2p_data->p2p_info.pages, + p2p_data->p2p_info.priv, + NULL); + if (r) + return r; + amd_put_pages(p2p_data->p2p_info.pages, + p2p_data->p2p_info.priv); + amd_release(p2p_data->p2p_info.priv); + kfree(p2p_data); + + *p_p2p_data = NULL; + + return 0; +} + +/** + * Check if given address belongs to GPU address space. + * + * \param address - Address to check + * \param pid - Process to which given address belongs. + * Could be NULL if current one. + * + * \return 0 - This is not GPU address managed by AMD driver + * 1 - This is GPU address managed by AMD driver + */ +static int rdma_is_gpu_address(uint64_t address, struct pid *pid) +{ + struct kfd_bo *buf_obj; + struct kfd_process *p; + + p = kfd_lookup_process_by_pid(pid); + if (!p) { + pr_debug("Could not find the process\n"); + return 0; + } + + buf_obj = kfd_process_find_bo_from_interval(p, address, address); + + kfd_unref_process(p); + if (!buf_obj) + return 0; + + return 1; +} + +/** + * Return the single page size to be used when building scatter/gather table + * for given range. + * + * \param address - Address + * \param length - Range length + * \param pid - Process id structure. Could be NULL if current one. + * \param page_size - On return: Page size + * + * \return 0 if operation was successful + */ +static int rdma_get_page_size(uint64_t address, uint64_t length, + struct pid *pid, unsigned long *page_size) +{ + /* + * As local memory is always consecutive, we can assume the local + * memory page size to be arbitrary. + * Currently we assume the local memory page size to be the same + * as system memory, which is 4KB. + */ + *page_size = PAGE_SIZE; + + return 0; +} + +/** + * Singleton object: rdma interface function pointers + */ +static const struct amd_rdma_interface rdma_ops = { + .get_pages = rdma_get_pages, + .put_pages = rdma_put_pages, + .is_gpu_address = rdma_is_gpu_address, + .get_page_size = rdma_get_page_size +}; + +/** + * amdkfd_query_rdma_interface - Return interface (function pointers table) for + * rdma interface + * + * + * \param interace - OUT: Pointer to interface + * + * \return 0 if operation was successful. + */ +int amdkfd_query_rdma_interface(const struct amd_rdma_interface **ops) +{ + *ops = &rdma_ops; + + return 0; +} +EXPORT_SYMBOL(amdkfd_query_rdma_interface); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index 8b6b2bd5c148f..cd8611401a664 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -145,8 +145,9 @@ struct pm4_mes_map_process { union { struct { - uint32_t pasid:16; - uint32_t reserved1:2; + uint32_t pasid:16; /* 0 - 15 */ + uint32_t reserved1:1; /* 16 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; uint32_t new_debug:1; uint32_t reserved2:1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h index 38f5cb6a222ab..e0ed62c4ade04 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h @@ -37,7 +37,7 @@ struct pm4_mes_map_process_aldebaran { struct { uint32_t pasid:16; /* 0 - 15 */ uint32_t single_memops:1; /* 16 */ - uint32_t reserved1:1; /* 17 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; /* 18 - 21 */ uint32_t new_debug:1; /* 22 */ uint32_t tmz:1; /* 23 */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f7c12d4f0abb9..670e6442dc692 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -38,14 +38,19 @@ #include #include #include -#include +#include +#include +/* amdkcl: this header file is included in kcl_device_cgroup.h +#include */ #include #include #include #include #include #include +#include +#include "amdgpu_amdkfd.h" #include "amd_shared.h" #include "amdgpu.h" @@ -144,6 +149,19 @@ enum kfd_ioctl_flags { * we also allow ioctls with SYS_ADMIN capability. */ KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0), + + /* + * @KFD_IOC_FLAG_PERFMON: + * Performance monitoring feature, GPU performance monitoring can allow users + * to gather some information about other processes. PC sampling can allow + * users to infer information about wavefronts from other processes that are + * running on the same CUs, such as which execution units they are using. As + * such, this type of performance monitoring should be protected and only + * available to users with sufficient capabilities: either CAP_PERFMON, or, + * for backwards compatibility, CAP_SYS_ADMIN. + */ + + KFD_IOC_FLAG_PERFMON = BIT(1), }; /* * Kernel module parameter to specify maximum number of supported queues per @@ -178,6 +196,11 @@ extern int debug_largebar; /* Set sh_mem_config.retry_disable on GFX v9 */ extern int amdgpu_noretry; +/* + * Enable privileged mode for all CP queues including user queues + */ +extern int priv_cp_queues; + /* Halt if HWS hang is detected */ extern int halt_if_hws_hang; @@ -192,6 +215,11 @@ extern int queue_preemption_timeout_ms; */ extern int amdgpu_no_queue_eviction_on_vm_fault; +/* + * Restore evicted process only if queues are active + */ +extern bool keep_idle_process_evicted; + /* Enable eviction debug messages */ extern bool debug_evictions; @@ -257,6 +285,26 @@ struct kfd_vmid_info { struct kfd_dev; +struct kfd_dev_pc_sampling_data { + uint32_t use_count; /* Num of PC sampling sessions */ + uint32_t active_count; /* Num of active sessions */ + uint32_t target_simd; /* target simd for trap */ + uint32_t target_wave_slot; /* target wave slot for trap */ + struct idr pc_sampling_idr; + struct task_struct *pc_sample_thread; + struct kfd_pc_sample_info pc_sample_info; +}; + +struct kfd_dev_pcs_hosttrap { + struct kfd_dev_pc_sampling_data base; +}; + +/* Per device PC Sampling data */ +struct kfd_dev_pc_sampling { + struct mutex mutex; + struct kfd_dev_pcs_hosttrap hosttrap_entry; +}; + struct kfd_node { unsigned int node_id; struct amdgpu_device *adev; /* Duplicated here along with keeping @@ -314,6 +362,8 @@ struct kfd_node { /* Track per device allocated watch points */ uint32_t alloc_watch_ids; spinlock_t watch_points_lock; + + struct kfd_dev_pc_sampling pcs_data; }; struct kfd_dev { @@ -358,6 +408,9 @@ struct kfd_dev { /* Compute Profile ref. count */ atomic_t compute_profile; + /*spm process id */ + unsigned int spm_pasid; + struct ida doorbell_ida; unsigned int max_doorbell_slices; @@ -373,6 +426,17 @@ struct kfd_dev { unsigned long *doorbell_bitmap; }; +struct kfd_ipc_obj; + +struct kfd_bo { + void *mem; + struct interval_tree_node it; + struct kfd_node *dev; + /* page-aligned VA address */ + uint64_t cpuva; + unsigned int mem_type; +}; + enum kfd_mempool { KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, @@ -608,6 +672,9 @@ struct queue { /* procfs */ struct kobject kobj; + struct attribute attr_gpuid; + struct attribute attr_size; + struct attribute attr_type; void *gang_ctx_bo; uint64_t gang_ctx_gpu_addr; @@ -703,6 +770,17 @@ struct qcm_process_device { /* bitmap for dynamic doorbell allocation from the bo */ unsigned long *doorbell_bitmap; + /* doorbell user mmap vma */ + struct vm_area_struct *doorbell_vma; + /* lock to serialize doorbell unmap and remap */ + struct mutex doorbell_lock; + + /* Indicate if doorbell is mapped or unmapped + * -1 means doorbells need to be unmapped because queue is evicted + * 0 means doorbells are unmapped + * 1 means doorbells are mapped + */ + int doorbell_mapped; }; /* KFD Memory Eviction */ @@ -714,6 +792,9 @@ struct qcm_process_device { /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10 +void kfd_process_schedule_restore(struct kfd_process *p); +int kfd_process_remap_doorbells_locked(struct kfd_process *p); + /* 8 byte handle containing GPU ID in the most significant 4 bytes and * idr_handle in the least significant 4 bytes */ @@ -737,6 +818,11 @@ enum kfd_pdd_bound { */ #define SDMA_ACTIVITY_DIVISOR 100 +struct pc_sampling_entry { + bool enabled; + struct kfd_process_device *pdd; +}; + /* Data that is per-process-per device. */ struct kfd_process_device { /* The device that owns this data. */ @@ -775,7 +861,7 @@ struct kfd_process_device { enum kfd_pdd_bound bound; /* VRAM usage */ - uint64_t vram_usage; + atomic64_t vram_usage; struct attribute attr_vram; char vram_filename[MAX_SYSFS_FILENAME_LEN]; @@ -784,6 +870,14 @@ struct kfd_process_device { struct attribute attr_sdma; char sdma_filename[MAX_SYSFS_FILENAME_LEN]; + /* spm data */ + struct kfd_spm_cntr *spm_cntr; + struct mutex spm_mutex; + struct work_struct spm_work; + spinlock_t spm_irq_lock; + /* reserve space to fix spm overflow */ + u32 spm_overflow_reserved; + /* Eviction activity tracking */ uint64_t last_evict_timestamp; atomic64_t evict_duration_counter; @@ -855,7 +949,9 @@ struct kfd_process_device { struct svm_range_list { struct mutex lock; +#ifdef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED struct rb_root_cached objects; +#endif struct list_head list; struct work_struct deferred_list_work; struct list_head deferred_range_list; @@ -866,6 +962,14 @@ struct svm_range_list { struct delayed_work restore_work; DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); struct task_struct *faulting_task; + /* check point ts decides if page fault recovery need be dropped */ + uint64_t checkpoint_ts[MAX_GPU_INSTANCE]; + + /* Default granularity to use in buffer migration + * and restoration of backing memory while handling + * recoverable page faults + */ + uint8_t default_granularity; }; /* Process data */ @@ -900,6 +1004,10 @@ struct kfd_process { /* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier; +#ifndef HAVE_MMU_NOTIFIER_PUT + /* Use for delayed freeing of kfd_process structure */ + struct rcu_head rcu; +#endif u32 pasid; /* @@ -925,13 +1033,19 @@ struct kfd_process { size_t signal_event_count; bool signal_event_limit_reached; +#ifndef HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED + struct rb_root bo_interval_tree; +#else + struct rb_root_cached bo_interval_tree; +#endif + /* Information used for memory eviction */ void *kgd_process_info; /* Eviction fence that is attached to all the BOs of this process. The * fence will be triggered during eviction and new one will be created * during restore */ - struct dma_fence __rcu *ef; + struct dma_fence *ef; /* Work items for evicting and restoring BOs */ struct delayed_work eviction_work; @@ -942,6 +1056,7 @@ struct kfd_process { * restored after an eviction */ unsigned long last_restore_timestamp; + unsigned long last_evict_timestamp; /* Indicates device process is debug attached with reserved vmid. */ bool debug_trap_enabled; @@ -994,6 +1109,9 @@ struct kfd_process { struct semaphore runtime_enable_sema; bool is_runtime_retry; struct kfd_runtime_info runtime_info; + + /* Indicates process' PC Sampling ref cnt*/ + uint32_t pc_sampling_ref; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -1042,9 +1160,9 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx( } void kfd_unref_process(struct kfd_process *p); -int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger); +int kfd_process_evict_queues(struct kfd_process *p, bool force, uint32_t trigger); int kfd_process_restore_queues(struct kfd_process *p); -void kfd_suspend_all_processes(void); +void kfd_suspend_all_processes(bool force); int kfd_resume_all_processes(void); struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process, @@ -1068,13 +1186,29 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, /* KFD process API for creating and translating handles */ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, - void *mem); + void *mem, uint64_t start, + uint64_t length, uint64_t cpuva, + unsigned int mem_type, + int preferred_id); void *kfd_process_device_translate_handle(struct kfd_process_device *p, int handle); +struct kfd_bo *kfd_process_device_find_bo(struct kfd_process_device *pdd, + int handle); +void *kfd_process_find_bo_from_interval(struct kfd_process *p, + uint64_t start_addr, + uint64_t last_addr); void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, int handle); struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid); +/* Process device data iterator */ +struct kfd_process_device *kfd_get_first_process_device_data( + struct kfd_process *p); +struct kfd_process_device *kfd_get_next_process_device_data( + struct kfd_process *p, + struct kfd_process_device *pdd); +bool kfd_has_process_device_data(struct kfd_process *p); + /* PASIDs */ int kfd_pasid_init(void); void kfd_pasid_exit(void); @@ -1089,6 +1223,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd); void kfd_doorbell_fini(struct kfd_dev *kfd); int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process, struct vm_area_struct *vma); +void kfd_doorbell_unmap(struct kfd_process_device *pdd); +int kfd_doorbell_remap(struct kfd_process_device *pdd); void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); @@ -1177,6 +1313,8 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, uint64_t tma_addr); void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd, bool enabled); +void kfd_process_set_trap_pc_sampling_flag(struct qcm_process_device *qpd, + enum kfd_ioctl_pc_sample_method method, bool enabled); /* CWSR initialization */ int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file *filep); @@ -1209,6 +1347,11 @@ struct kfd_criu_bo_priv_data { uint64_t user_addr; uint32_t idr_handle; uint32_t mapped_gpuids[MAX_GPU_INSTANCE]; + + /* IPC related variables */ + uint32_t is_imported; + uint32_t ipc_flags; + uint32_t ipc_share_handle[4]; }; /* @@ -1324,6 +1467,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq); int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid); +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id); /* Process Queue Manager */ struct process_queue_node { @@ -1338,7 +1482,6 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); void pqm_uninit(struct process_queue_manager *pqm); int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_node *dev, - struct file *f, struct queue_properties *properties, unsigned int *qid, const struct kfd_criu_queue_priv_data *q_data, @@ -1507,6 +1650,17 @@ int kfd_send_exception_to_runtime(struct kfd_process *p, uint64_t error_reason); bool kfd_is_locked(void); +void kfd_spm_init_process_device(struct kfd_process_device *pdd); +int kfd_release_spm(struct kfd_process_device *pdd, struct amdgpu_device *adev); +int kfd_rlc_spm(struct kfd_process *p, void __user *data); + +/* PeerDirect support */ +void kfd_init_peer_direct(void); +void kfd_close_peer_direct(void); + +/* IPC Support */ +int kfd_ipc_init(void); + /* Compute profile */ void kfd_inc_compute_active(struct kfd_node *dev); void kfd_dec_compute_active(struct kfd_node *dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a902950cc0601..8ae011de71347 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -41,8 +41,10 @@ struct mm_struct; #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_svm.h" +#include "kfd_trace.h" #include "kfd_smi_events.h" #include "kfd_debug.h" +#include "kfd_pc_sampling.h" /* * List of struct kfd_process (field kfd_process). @@ -73,6 +75,8 @@ static void evict_process_worker(struct work_struct *work); static void restore_process_worker(struct work_struct *work); static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd); +static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr, + char *name); struct kfd_procfs_tree { struct kobject *kobj; @@ -270,6 +274,11 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) struct kfd_node *dev = NULL; struct kfd_process *proc = NULL; struct kfd_process_device *pdd = NULL; + int i; + struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; + u32 queue_format; + + memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); dev = pdd->dev; @@ -287,8 +296,29 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Collect wave count from device if it supports */ wave_cnt = 0; max_waves_per_cu = 0; - dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, - &max_waves_per_cu, 0); + + /* + * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition. + * For AQL queues, because of cooperative dispatch we multiply the wave count + * by number of XCCs in the partition to get the total wave counts across all + * XCCs in the partition. + * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is. + */ + dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, + &max_waves_per_cu, ffs(dev->xcc_mask) - 1); + + for (i = 0; i < AMDGPU_MAX_QUEUES; i++) { + if (cu_occupancy[i].wave_cnt != 0 && + kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd, + cu_occupancy[i].doorbell_off, + &queue_format)) { + if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4)) + wave_cnt += cu_occupancy[i].wave_cnt; + else + wave_cnt += (NUM_XCC(dev->xcc_mask) * + cu_occupancy[i].wave_cnt); + } + } /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; @@ -306,7 +336,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, } else if (strncmp(attr->name, "vram_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_vram); - return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); + return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage)); } else if (strncmp(attr->name, "sdma_", 5) == 0) { struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, attr_sdma); @@ -440,36 +470,12 @@ static ssize_t kfd_sysfs_counters_show(struct kobject *kobj, return 0; } -static struct attribute attr_queue_size = { - .name = "size", - .mode = KFD_SYSFS_FILE_MODE -}; - -static struct attribute attr_queue_type = { - .name = "type", - .mode = KFD_SYSFS_FILE_MODE -}; - -static struct attribute attr_queue_gpuid = { - .name = "gpuid", - .mode = KFD_SYSFS_FILE_MODE -}; - -static struct attribute *procfs_queue_attrs[] = { - &attr_queue_size, - &attr_queue_type, - &attr_queue_gpuid, - NULL -}; -ATTRIBUTE_GROUPS(procfs_queue); - static const struct sysfs_ops procfs_queue_ops = { .show = kfd_procfs_queue_show, }; static const struct kobj_type procfs_queue_type = { .sysfs_ops = &procfs_queue_ops, - .default_groups = procfs_queue_groups, }; static const struct sysfs_ops procfs_stats_ops = { @@ -511,6 +517,10 @@ int kfd_procfs_add_queue(struct queue *q) return ret; } + kfd_sysfs_create_file(&q->kobj, &q->attr_gpuid, "gpuid"); + kfd_sysfs_create_file(&q->kobj, &q->attr_size, "size"); + kfd_sysfs_create_file(&q->kobj, &q->attr_type, "type"); + return 0; } @@ -655,6 +665,10 @@ void kfd_procfs_del_queue(struct queue *q) if (!q) return; + sysfs_remove_file(&q->kobj, &q->attr_gpuid); + sysfs_remove_file(&q->kobj, &q->attr_size); + sysfs_remove_file(&q->kobj, &q->attr_type); + kobject_del(&q->kobj); kobject_put(&q->kobj); } @@ -955,7 +969,7 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid) static void kfd_process_device_free_bos(struct kfd_process_device *pdd) { struct kfd_process *p = pdd->process; - void *mem; + struct kfd_bo *buf_obj; int id; int i; @@ -963,7 +977,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) * Remove all handles from idr and release appropriate * local memory object */ - idr_for_each_entry(&pdd->alloc_idr, mem, id) { + idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) { for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *peer_pdd = p->pdds[i]; @@ -971,11 +985,11 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) if (!peer_pdd->drm_priv) continue; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - peer_pdd->dev->adev, mem, peer_pdd->drm_priv); + peer_pdd->dev->adev, buf_obj->mem, peer_pdd->drm_priv); } - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, - pdd->drm_priv, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, + buf_obj->mem, pdd->drm_priv, NULL); kfd_process_device_remove_obj_handle(pdd, id); } } @@ -1029,6 +1043,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", pdd->dev->id, p->pasid); + kfd_pc_sample_release(pdd); + kfd_release_spm(pdd, pdd->dev->adev); + kfd_process_device_destroy_cwsr_dgpu(pdd); kfd_process_device_destroy_ib_mem(pdd); @@ -1043,6 +1060,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) get_order(KFD_CWSR_TBA_TMA_SIZE)); idr_destroy(&pdd->alloc_idr); + mutex_destroy(&pdd->qpd.doorbell_lock); kfd_free_process_doorbells(pdd->dev->kfd, pdd); @@ -1118,7 +1136,6 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); - struct dma_fence *ef; kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); @@ -1127,9 +1144,7 @@ static void kfd_process_wq_release(struct work_struct *work) * destroyed. This allows any BOs to be freed without * triggering pointless evictions or waiting for fences. */ - synchronize_rcu(); - ef = rcu_access_pointer(p->ef); - dma_fence_signal(ef); + dma_fence_signal(p->ef); kfd_process_remove_sysfs(p); @@ -1138,7 +1153,7 @@ static void kfd_process_wq_release(struct work_struct *work) svm_range_list_fini(p); kfd_process_destroy_pdds(p); - dma_fence_put(ef); + dma_fence_put(p->ef); kfd_event_free_process(p); @@ -1158,6 +1173,7 @@ static void kfd_process_ref_release(struct kref *ref) queue_work(kfd_process_wq, &p->release_work); } +#ifdef HAVE_MMU_NOTIFIER_PUT static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm) { int idx = srcu_read_lock(&kfd_processes_srcu); @@ -1172,10 +1188,21 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) { kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } +#else +static void kfd_process_destroy_delayed(struct rcu_head *rcu) +{ + struct kfd_process *p = container_of(rcu, struct kfd_process, rcu); + + kfd_unref_process(p); +} +#endif static void kfd_process_notifier_release_internal(struct kfd_process *p) { int i; +#ifndef HAVE_MMU_NOTIFIER_PUT + struct mm_struct *mm = p->mm; +#endif cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); @@ -1210,7 +1237,12 @@ static void kfd_process_notifier_release_internal(struct kfd_process *p) srcu_read_unlock(&kfd_processes_srcu, idx); } +#ifdef HAVE_MMU_NOTIFIER_PUT mmu_notifier_put(&p->mmu_notifier); +#else + mmu_notifier_unregister_no_release(&p->mmu_notifier, mm); + mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); +#endif } static void kfd_process_notifier_release(struct mmu_notifier *mn, @@ -1247,8 +1279,10 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, +#ifdef HAVE_MMU_NOTIFIER_PUT .alloc_notifier = kfd_process_alloc_notifier, .free_notifier = kfd_process_free_notifier, +#endif }; /* @@ -1452,13 +1486,36 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) return true; } +/* bit offset in 1st-level TMA's 2nd byte which used for KFD_TRAP_TYPE_BIT */ +enum KFD_TRAP_TYPE_BIT { + KFD_TRAP_TYPE_DEBUG = 0, /* bit 0 for debug trap */ + KFD_TRAP_TYPE_HOST, + KFD_TRAP_TYPE_STOCHASTIC, +}; + void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd, bool enabled) { if (qpd->cwsr_kaddr) { - uint64_t *tma = - (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); - tma[2] = enabled; + volatile unsigned long *tma = + (volatile unsigned long *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + if (enabled) + set_bit(KFD_TRAP_TYPE_DEBUG, &tma[2]); + else + clear_bit(KFD_TRAP_TYPE_DEBUG, &tma[2]); + } +} + +void kfd_process_set_trap_pc_sampling_flag(struct qcm_process_device *qpd, + enum kfd_ioctl_pc_sample_method method, bool enabled) +{ + if (qpd->cwsr_kaddr) { + volatile unsigned long *tma = + (volatile unsigned long *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + if (enabled) + set_bit(method, &tma[2]); + else + clear_bit(method, &tma[2]); } } @@ -1469,7 +1526,9 @@ void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd, static struct kfd_process *create_process(const struct task_struct *thread) { struct kfd_process *process; +#ifdef HAVE_MMU_NOTIFIER_PUT struct mmu_notifier *mn; +#endif int err = -ENOMEM; process = kzalloc(sizeof(*process), GFP_KERNEL); @@ -1526,6 +1585,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) */ kref_get(&process->ref); +#ifdef HAVE_MMU_NOTIFIER_PUT /* MMU notifier registration must be the last call that can fail * because after this point we cannot unwind the process creation. * After this point, mmu_notifier_put will trigger the cleanup by @@ -1537,10 +1597,21 @@ static struct kfd_process *create_process(const struct task_struct *thread) goto err_register_notifier; } BUG_ON(mn != &process->mmu_notifier); +#else + /* Must be last, have to use release destruction after this */ + process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; + err = mmu_notifier_register(&process->mmu_notifier, process->mm); + if (err) + goto err_register_notifier; +#endif kfd_unref_process(process); get_task_struct(process->lead_thread); + /* If PeerDirect interface was not detected try to detect it again + * in case if network driver was loaded later. + */ + kfd_init_peer_direct(); INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler); return process; @@ -1595,14 +1666,16 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, pdd->qpd.pqm = &p->pqm; pdd->qpd.evicted = 0; pdd->qpd.mapped_gws_queue = false; + mutex_init(&pdd->qpd.doorbell_lock); pdd->process = p; pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false; pdd->runtime_inuse = false; - pdd->vram_usage = 0; + atomic64_set(&pdd->vram_usage, 0); pdd->sdma_past_activity_counter = 0; pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); + kfd_spm_init_process_device(pdd); if (dev->kfd->shared_resources.enable_mes) { retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, @@ -1610,7 +1683,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, &pdd->proc_ctx_bo, &pdd->proc_ctx_gpu_addr, &pdd->proc_ctx_cpu_ptr, - false); + false, true); if (retval) { dev_err(dev->adev->dev, "failed to allocate process context bo\n"); @@ -1758,9 +1831,50 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, * Assumes that the process lock is held. */ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, - void *mem) + void *mem, uint64_t start, + uint64_t length, uint64_t cpuva, + unsigned int mem_type, + int preferred_id) { - return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL); + int handle; + struct kfd_bo *buf_obj; + struct kfd_process *p; + + p = pdd->process; + + buf_obj = kzalloc(sizeof(*buf_obj), GFP_KERNEL); + + if (!buf_obj) + return -ENOMEM; + + buf_obj->it.start = start; + buf_obj->it.last = start + length - 1; + interval_tree_insert(&buf_obj->it, &p->bo_interval_tree); + + buf_obj->mem = mem; + buf_obj->dev = pdd->dev; + buf_obj->cpuva = cpuva; + buf_obj->mem_type = mem_type; + + if (preferred_id < 0) + handle = idr_alloc(&pdd->alloc_idr, buf_obj, 0, 0, GFP_KERNEL); + else + handle = idr_alloc(&pdd->alloc_idr, buf_obj, preferred_id, + preferred_id + 1, GFP_KERNEL); + + if (handle < 0) + kfree(buf_obj); + + return handle; +} + +struct kfd_bo *kfd_process_device_find_bo(struct kfd_process_device *pdd, + int handle) +{ + if (handle < 0) + return NULL; + + return (struct kfd_bo *)idr_find(&pdd->alloc_idr, handle); } /* Translate specific handle from process local memory idr @@ -1769,10 +1883,37 @@ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, int handle) { - if (handle < 0) + struct kfd_bo *buf_obj; + + buf_obj = kfd_process_device_find_bo(pdd, handle); + + return buf_obj->mem; +} + +void *kfd_process_find_bo_from_interval(struct kfd_process *p, + uint64_t start_addr, + uint64_t last_addr) +{ + struct interval_tree_node *it_node; + struct kfd_bo *buf_obj; + + it_node = interval_tree_iter_first(&p->bo_interval_tree, + start_addr, last_addr); + if (!it_node) { + pr_err("0x%llx-0x%llx does not relate to an existing buffer\n", + start_addr, last_addr); return NULL; + } + + if (interval_tree_iter_next(it_node, start_addr, last_addr)) { + pr_err("0x%llx-0x%llx spans more than a single BO\n", + start_addr, last_addr); + return NULL; + } + + buf_obj = container_of(it_node, struct kfd_bo, it); - return idr_find(&pdd->alloc_idr, handle); + return buf_obj; } /* Remove specific handle from process local memory idr @@ -1781,8 +1922,21 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *pdd, void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, int handle) { - if (handle >= 0) - idr_remove(&pdd->alloc_idr, handle); + struct kfd_bo *buf_obj; + struct kfd_process *p; + + p = pdd->process; + + if (handle < 0) + return; + + buf_obj = kfd_process_device_find_bo(pdd, handle); + + idr_remove(&pdd->alloc_idr, handle); + + interval_tree_remove(&buf_obj->it, &p->bo_interval_tree); + + kfree(buf_obj); } /* This increments the process->ref counter. */ @@ -1827,9 +1981,9 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) +int kfd_process_evict_queues(struct kfd_process *p, bool force, uint32_t trigger) { - int r = 0; + int r = 0, r_tmp = 0; int i; unsigned int n_evicted = 0; @@ -1840,15 +1994,17 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, trigger); - r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, + r_tmp = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, &pdd->qpd); /* evict return -EIO if HWS is hang or asic is resetting, in this case * we would like to set all the queues to be in evicted state to prevent * them been add back since they actually not be saved right now. */ - if (r && r != -EIO) { + if (r_tmp && r_tmp != -EIO) { dev_err(dev, "Failed to evict process queues\n"); - goto fail; + r = r_tmp; + if (!force) + goto fail; } n_evicted++; @@ -1946,6 +2102,95 @@ static int signal_eviction_fence(struct kfd_process *p) return ret; } +void kfd_process_schedule_restore(struct kfd_process *p) +{ + unsigned long evicted_jiffies; + unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_RESTORE_TIME_MS); + + /* wait at least PROCESS_RESTORE_TIME_MS before attempting to restore + */ + evicted_jiffies = get_jiffies_64() - p->last_evict_timestamp; + if (delay_jiffies > evicted_jiffies) + delay_jiffies -= evicted_jiffies; + else + delay_jiffies = 0; + + pr_debug("Process %d schedule restore work\n", p->pasid); + if (mod_delayed_work(kfd_restore_wq, &p->restore_work, delay_jiffies)) + kfd_process_restore_queues(p); +} + +static void kfd_process_unmap_doorbells(struct kfd_process *p) +{ + struct mm_struct *mm = p->mm; + int i; + + mmap_write_lock(mm); + + for (i = 0; i < p->n_pdds; i++) + kfd_doorbell_unmap(p->pdds[i]); + + mmap_write_unlock(mm); +} + +int kfd_process_remap_doorbells_locked(struct kfd_process *p) +{ + int ret = 0; + int i; + + for (i = 0; i < p->n_pdds; i++) + ret = kfd_doorbell_remap(p->pdds[i]); + + return ret; +} + +static int kfd_process_remap_doorbells(struct kfd_process *p) +{ + struct mm_struct *mm = p->mm; + int ret = 0; + + mmap_write_lock(mm); + ret = kfd_process_remap_doorbells_locked(p); + mmap_write_unlock(mm); + + return ret; +} + +/** + * kfd_process_unmap_doorbells_if_idle - Check if queues are active + * + * Returns true if queues are idle, and unmap doorbells. + * Returns false if queues are active + */ +static bool kfd_process_unmap_doorbells_if_idle(struct kfd_process *p) +{ + bool busy = false; + int i; + + if (!keep_idle_process_evicted) + return false; + + /* Unmap doorbell first to avoid race conditions. Otherwise while the + * second queue is checked, the first queue may get more work, but we + * won't detect that since it has been checked + */ + kfd_process_unmap_doorbells(p); + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + busy = check_if_queues_active(pdd->qpd.dqm, &pdd->qpd); + if (busy) + break; + } + + /* Remap doorbell if process queue is not idle */ + if (busy) + kfd_process_remap_doorbells(p); + + return !busy; +} + static void evict_process_worker(struct work_struct *work) { int ret; @@ -1958,9 +2203,12 @@ static void evict_process_worker(struct work_struct *work) * lifetime of this thread, kfd_process p will be valid */ p = container_of(dwork, struct kfd_process, eviction_work); + trace_kfd_evict_process_worker_start(p); + + p->last_evict_timestamp = get_jiffies_64(); pr_debug("Started evicting pasid 0x%x\n", p->pasid); - ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM); + ret = kfd_process_evict_queues(p, false, KFD_QUEUE_EVICTION_TRIGGER_TTM); if (!ret) { /* If another thread already signaled the eviction fence, * they are responsible stopping the queues and scheduling @@ -1974,6 +2222,7 @@ static void evict_process_worker(struct work_struct *work) pr_debug("Finished evicting pasid 0x%x\n", p->pasid); } else pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid); + trace_kfd_evict_process_worker_end(p, ret ? "Failed" : "Success"); } static int restore_process_helper(struct kfd_process *p) @@ -2009,7 +2258,15 @@ static void restore_process_worker(struct work_struct *work) * lifetime of this thread, kfd_process p will be valid */ p = container_of(dwork, struct kfd_process, restore_work); + + if (kfd_process_unmap_doorbells_if_idle(p)) { + pr_debug("Process %d queues idle, doorbell unmapped\n", + p->pasid); + return; + } + pr_debug("Started restoring pasid 0x%x\n", p->pasid); + trace_kfd_restore_process_worker_start(p); /* Setting last_restore_timestamp before successful restoration. * Otherwise this would have to be set by KGD (restore_process_bos) @@ -2030,10 +2287,15 @@ static void restore_process_worker(struct work_struct *work) if (mod_delayed_work(kfd_restore_wq, &p->restore_work, msecs_to_jiffies(PROCESS_RESTORE_TIME_MS))) kfd_process_restore_queues(p); + trace_kfd_restore_process_worker_end(p, ret ? + "Rescheduled restore" : + "Failed to reschedule restore"); + } else { + trace_kfd_restore_process_worker_end(p, "Success"); } } -void kfd_suspend_all_processes(void) +void kfd_suspend_all_processes(bool force) { struct kfd_process *p; unsigned int temp; @@ -2041,7 +2303,7 @@ void kfd_suspend_all_processes(void) WARN(debug_evictions, "Evicting all processes"); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) + if (kfd_process_evict_queues(p, force, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) pr_err("Failed to suspend process 0x%x\n", p->pasid); signal_eviction_fence(p); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 20ea745729ee3..82ddd13453d06 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -235,7 +235,7 @@ void pqm_uninit(struct process_queue_manager *pqm) static int init_user_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue **q, struct queue_properties *q_properties, - struct file *f, unsigned int qid) + unsigned int qid) { int retval; @@ -260,7 +260,7 @@ static int init_user_queue(struct process_queue_manager *pqm, &(*q)->gang_ctx_bo, &(*q)->gang_ctx_gpu_addr, &(*q)->gang_ctx_cpu_ptr, - false); + false, true); if (retval) { pr_err("failed to allocate gang context bo\n"); goto cleanup; @@ -300,7 +300,6 @@ static int init_user_queue(struct process_queue_manager *pqm, int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_node *dev, - struct file *f, struct queue_properties *properties, unsigned int *qid, const struct kfd_criu_queue_priv_data *q_data, @@ -374,7 +373,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ - retval = init_user_queue(pqm, dev, &q, properties, f, *qid); + retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -395,7 +394,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = init_user_queue(pqm, dev, &q, properties, f, *qid); + retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -517,9 +516,12 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval) goto err_destroy_queue; - kfd_procfs_del_queue(pqn->q); dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); + + if (retval == -ERESTARTSYS) + return retval; + if (retval) { pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", pqm->process->pasid, @@ -527,6 +529,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + kfd_procfs_del_queue(pqn->q); kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); @@ -1029,8 +1032,7 @@ int kfd_criu_restore_queue(struct kfd_process *p, print_queue_properties(&qp); - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack, - NULL); + ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); goto exit; @@ -1046,6 +1048,7 @@ int kfd_criu_restore_queue(struct kfd_process *p, pr_debug("Queue id %d was restored successfully\n", queue_id); kfree(q_data); + kfree(q_extra_data); return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ea6a8e43bd5b2..4a590dfc479b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -129,7 +129,7 @@ static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user, struct kfd_smi_client *client = filep->private_data; uint64_t events; - if (!access_ok(user, size) || size < sizeof(events)) + if (!kcl_access_ok(user, size) || size < sizeof(events)) return -EFAULT; if (copy_from_user(&events, user, sizeof(events))) return -EFAULT; @@ -235,17 +235,16 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset, amdgpu_reset_get_desc(reset_context, reset_cause, sizeof(reset_cause)); - kfd_smi_event_add(0, dev, event, "%x %s\n", - dev->reset_seq_num, - reset_cause); + kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET( + dev->reset_seq_num, reset_cause)); } void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, uint64_t throttle_bitmask) { - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n", + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING( throttle_bitmask, - amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); + amdgpu_dpm_get_thermal_throttling_counter(dev->adev))); } void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) @@ -256,8 +255,8 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) if (task_info) { /* Report VM faults from user applications, not retry from kernel */ if (task_info->pid) - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", - task_info->pid, task_info->task_name); + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT( + task_info->pid, task_info->task_name)); amdgpu_vm_put_task_info(task_info); } } @@ -267,16 +266,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid, ktime_t ts) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START, - "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid, - address, node->id, write_fault ? 'W' : 'R'); + KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid, + address, node->id, write_fault ? 'W' : 'R')); } void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid, unsigned long address, bool migration) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END, - "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(), - pid, address, node->id, migration ? 'M' : 'U'); + KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(), + pid, address, node->id, migration ? 'M' : 'U')); } void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, @@ -286,9 +285,9 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START, - "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", + KFD_EVENT_FMT_MIGRATE_START( ktime_get_boottime_ns(), pid, start, end - start, - from, to, prefetch_loc, preferred_loc, trigger); + from, to, prefetch_loc, preferred_loc, trigger)); } void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, @@ -296,24 +295,24 @@ void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, uint32_t from, uint32_t to, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END, - "%lld -%d @%lx(%lx) %x->%x %d\n", + KFD_EVENT_FMT_MIGRATE_END( ktime_get_boottime_ns(), pid, start, end - start, - from, to, trigger); + from, to, trigger)); } void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION, - "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid, - node->id, trigger); + KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid, + node->id, trigger)); } void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE, - "%lld -%d %x\n", ktime_get_boottime_ns(), pid, - node->id); + KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid, + node->id, 0)); } void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) @@ -330,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) kfd_smi_event_add(p->lead_thread->pid, pdd->dev, KFD_SMI_EVENT_QUEUE_RESTORE, - "%lld -%d %x %c\n", ktime_get_boottime_ns(), - p->lead_thread->pid, pdd->dev->id, 'R'); + KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), + p->lead_thread->pid, pdd->dev->id, 'R')); } kfd_unref_process(p); } @@ -341,8 +340,8 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU, - "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(), - pid, address, last - address + 1, node->id, trigger); + KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(), + pid, address, last - address + 1, node->id, trigger)); } int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_spm.c b/drivers/gpu/drm/amd/amdkfd/kfd_spm.c new file mode 100644 index 0000000000000..d6a03240f36af --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_spm.c @@ -0,0 +1,499 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include "kfd_priv.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_irq.h" +#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" +#include "ivsrcid/ivsrcid_vislands30.h" +#include // for use_mm() +#include + +struct user_buf { + uint64_t __user *user_addr; + u32 ubufsize; +}; + +struct kfd_spm_cntr { + struct user_buf ubuf; + struct mutex spm_worker_mutex; + u64 gpu_addr; + u32 ring_size; + u32 ring_rptr; + u32 size_copied; + u32 has_data_loss; + u32 *cpu_addr; + void *spm_obj; + wait_queue_head_t spm_buf_wq; + bool has_user_buf; + bool is_user_buf_filled; + bool is_spm_started; +}; + +/* used to detect SPM overflow */ +#define SPM_OVERFLOW_MAGIC 0xBEEFABCDDEADABCD + +static void kfd_spm_preset(struct kfd_process_device *pdd, u32 size) +{ + uint64_t *overflow_ptr, *overflow_end_ptr; + + overflow_ptr = (uint64_t *)((uint64_t)pdd->spm_cntr->cpu_addr + + pdd->spm_cntr->ring_size + 0x20); + overflow_end_ptr = overflow_ptr + (size >> 3); + /* SPM data filling is 0x20 alignment */ + for ( ; overflow_ptr < overflow_end_ptr; overflow_ptr += 4) + *overflow_ptr = SPM_OVERFLOW_MAGIC; +} + +static int kfd_spm_data_copy(struct kfd_process_device *pdd, u32 size_to_copy) +{ + struct kfd_spm_cntr *spm = pdd->spm_cntr; + uint64_t __user *user_address; + uint64_t *ring_buf; + u32 user_buf_space_left; + int ret = 0; + + if (spm->ubuf.user_addr == NULL) + return -EFAULT; + + user_address = (uint64_t *)((uint64_t)spm->ubuf.user_addr + spm->size_copied); + /* From RLC spec, ring_rptr = 0 points to spm->cpu_addr + 0x20 */ + ring_buf = (uint64_t *)((uint64_t)spm->cpu_addr + spm->ring_rptr + 0x20); + + if (user_address == NULL) + return -EFAULT; + + user_buf_space_left = spm->ubuf.ubufsize - spm->size_copied; + + if (size_to_copy < user_buf_space_left) { + ret = copy_to_user(user_address, ring_buf, size_to_copy); + if (ret) { + spm->has_data_loss = true; + return -EFAULT; + } + spm->size_copied += size_to_copy; + spm->ring_rptr += size_to_copy; + } else { + ret = copy_to_user(user_address, ring_buf, user_buf_space_left); + if (ret) { + spm->has_data_loss = true; + return -EFAULT; + } + + spm->size_copied = spm->ubuf.ubufsize; + spm->ring_rptr += user_buf_space_left; + WRITE_ONCE(spm->is_user_buf_filled, true); + wake_up(&pdd->spm_cntr->spm_buf_wq); + } + + return ret; +} + +static int kfd_spm_read_ring_buffer(struct kfd_process_device *pdd) +{ + struct kfd_spm_cntr *spm = pdd->spm_cntr; + u32 overflow_size = 0; + u32 size_to_copy; + int ret = 0; + u32 ring_wptr; + + ring_wptr = READ_ONCE(spm->cpu_addr[0]); + + /* SPM might stall if we cannot copy data out of SPM ringbuffer. + * spm->has_data_loss is only a hint here since stall is only a + * possibility and data loss might not happen. But it is a useful + * hint for user mode profiler to take extra actions. + */ + if (!spm->has_user_buf || spm->is_user_buf_filled) { + spm->has_data_loss = true; + /* set flag due to there is no flag setup + * when read ring buffer timeout. + */ + if (!spm->is_user_buf_filled) + spm->is_user_buf_filled = true; + goto exit; + } + + if (spm->ring_rptr == ring_wptr) + goto exit; + + if (ring_wptr > spm->ring_rptr) { + size_to_copy = ring_wptr - spm->ring_rptr; + ret = kfd_spm_data_copy(pdd, size_to_copy); + } else { + uint64_t *ring_start, *ring_end; + + ring_start = (uint64_t *)((uint64_t)pdd->spm_cntr->cpu_addr + 0x20); + ring_end = ring_start + (pdd->spm_cntr->ring_size >> 3); + for ( ; overflow_size < pdd->spm_overflow_reserved; overflow_size += 0x20) { + uint64_t *overflow_ptr = ring_end + (overflow_size >> 3); + + if (*overflow_ptr == SPM_OVERFLOW_MAGIC) + break; + } + /* move overflow counters into ring buffer to avoid data loss */ + memcpy(ring_start, ring_end, overflow_size); + + size_to_copy = spm->ring_size - spm->ring_rptr; + ret = kfd_spm_data_copy(pdd, size_to_copy); + + /* correct counter start point */ + if (spm->ring_size == spm->ring_rptr) { + if (ring_wptr == 0) { + /* reset rptr to start point of ring buffer */ + spm->ring_rptr = ring_wptr; + goto exit; + } + spm->ring_rptr = 0; + size_to_copy = ring_wptr - spm->ring_rptr; + if (!ret) + ret = kfd_spm_data_copy(pdd, size_to_copy); + } + } + +exit: + kfd_spm_preset(pdd, overflow_size); + amdgpu_amdkfd_rlc_spm_set_rdptr(pdd->dev->adev, spm->ring_rptr); + return ret; +} + +static void kfd_spm_work(struct work_struct *work) +{ + struct kfd_process_device *pdd = container_of(work, struct kfd_process_device, spm_work); + struct mm_struct *mm = NULL; // referenced + + mm = get_task_mm(pdd->process->lead_thread); + if (mm) { + kthread_use_mm(mm); + { /* attach mm */ + mutex_lock(&pdd->spm_cntr->spm_worker_mutex); + kfd_spm_read_ring_buffer(pdd); + mutex_unlock(&pdd->spm_cntr->spm_worker_mutex); + } /* detach mm */ + kthread_unuse_mm(mm); + /* release the mm structure */ + mmput(mm); + } +} + +void kfd_spm_init_process_device(struct kfd_process_device *pdd) +{ + /* pre-gfx11 spm has a hardware bug to cause overflow */ + if (pdd->dev->adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 1)) + pdd->spm_overflow_reserved = 0x400; + + mutex_init(&pdd->spm_mutex); + pdd->spm_cntr = NULL; +} + +static int kfd_acquire_spm(struct kfd_process_device *pdd, struct amdgpu_device *adev) +{ + int ret = 0; + + mutex_lock(&pdd->spm_mutex); + + if (pdd->spm_cntr) { + mutex_unlock(&pdd->spm_mutex); + return -EINVAL; + } + + pdd->spm_cntr = kzalloc(sizeof(struct kfd_spm_cntr), GFP_KERNEL); + if (!pdd->spm_cntr) { + mutex_unlock(&pdd->spm_mutex); + return -ENOMEM; + } + + /* git spm ring buffer 4M */ + pdd->spm_cntr->ring_size = order_base_2(4 * 1024 * 1024/4); + pdd->spm_cntr->ring_size = (1 << pdd->spm_cntr->ring_size) * 4; + pdd->spm_cntr->has_user_buf = false; + + ret = amdgpu_amdkfd_alloc_gtt_mem(adev, + pdd->spm_cntr->ring_size, &pdd->spm_cntr->spm_obj, + &pdd->spm_cntr->gpu_addr, (void *)&pdd->spm_cntr->cpu_addr, + false, false); + + if (ret) + goto alloc_gtt_mem_failure; + + /* reserve space to fix spm overflow */ + pdd->spm_cntr->ring_size -= pdd->spm_overflow_reserved; + ret = amdgpu_amdkfd_rlc_spm_acquire(adev, drm_priv_to_vm(pdd->drm_priv), + pdd->spm_cntr->gpu_addr, pdd->spm_cntr->ring_size); + + /* + * By definition, the last 8 DWs of the buffer are not part of the rings + * and are instead part of the Meta data area. + */ + pdd->spm_cntr->ring_size -= 0x20; + + if (ret) + goto acquire_spm_failure; + + mutex_init(&pdd->spm_cntr->spm_worker_mutex); + + init_waitqueue_head(&pdd->spm_cntr->spm_buf_wq); + INIT_WORK(&pdd->spm_work, kfd_spm_work); + + spin_lock_init(&pdd->spm_irq_lock); + + kfd_spm_preset(pdd, pdd->spm_overflow_reserved); + + goto out; + +acquire_spm_failure: + amdgpu_amdkfd_free_gtt_mem(adev, &pdd->spm_cntr->spm_obj); + +alloc_gtt_mem_failure: + kfree(pdd->spm_cntr); + pdd->spm_cntr = NULL; + +out: + mutex_unlock(&pdd->spm_mutex); + return ret; +} + +int kfd_release_spm(struct kfd_process_device *pdd, struct amdgpu_device *adev) +{ + unsigned long flags; + + mutex_lock(&pdd->spm_mutex); + if (!pdd->spm_cntr) { + mutex_unlock(&pdd->spm_mutex); + return -EINVAL; + } + + spin_lock_irqsave(&pdd->spm_irq_lock, flags); + pdd->spm_cntr->is_spm_started = false; + spin_unlock_irqrestore(&pdd->spm_irq_lock, flags); + + flush_work(&pdd->spm_work); + wake_up_all(&pdd->spm_cntr->spm_buf_wq); + + amdgpu_amdkfd_rlc_spm_release(adev, drm_priv_to_vm(pdd->drm_priv)); + amdgpu_amdkfd_free_gtt_mem(adev, &pdd->spm_cntr->spm_obj); + + spin_lock_irqsave(&pdd->spm_irq_lock, flags); + kfree(pdd->spm_cntr); + pdd->spm_cntr = NULL; + spin_unlock_irqrestore(&pdd->spm_irq_lock, flags); + + mutex_unlock(&pdd->spm_mutex); + return 0; +} + +static void spm_update_dest_info(struct kfd_process_device *pdd, + struct kfd_ioctl_spm_args *user_spm_data) +{ + struct kfd_spm_cntr *spm = pdd->spm_cntr; + mutex_lock(&pdd->spm_cntr->spm_worker_mutex); + if (spm->has_user_buf) { + user_spm_data->bytes_copied = spm->size_copied; + user_spm_data->has_data_loss = spm->has_data_loss; + spm->has_user_buf = false; + } + if (user_spm_data->dest_buf) { + spm->ubuf.user_addr = (uint64_t *)user_spm_data->dest_buf; + spm->ubuf.ubufsize = user_spm_data->buf_size; + spm->has_data_loss = false; + spm->size_copied = 0; + spm->is_user_buf_filled = false; + spm->has_user_buf = true; + } + mutex_unlock(&pdd->spm_cntr->spm_worker_mutex); +} + +static int spm_wait_for_fill_awake(struct kfd_spm_cntr *spm, + struct kfd_ioctl_spm_args *user_spm_data) +{ + int ret = 0; + + long timeout = msecs_to_jiffies(user_spm_data->timeout); + long start_jiffies = jiffies; + + ret = wait_event_interruptible_timeout(spm->spm_buf_wq, + (READ_ONCE(spm->is_user_buf_filled) == true), + timeout); + + switch (ret) { + case -ERESTARTSYS: + /* Subtract elapsed time from timeout so we wait that much + * less when the call gets restarted. + */ + timeout -= (jiffies - start_jiffies); + if (timeout <= 0) { + ret = -ETIME; + timeout = 0; + pr_debug("[%s] interrupted by signal\n", __func__); + } + break; + + case 0: + default: + timeout = ret; + ret = 0; + break; + } + user_spm_data->timeout = jiffies_to_msecs(timeout); + + return ret; +} + +static int kfd_set_dest_buffer(struct kfd_process_device *pdd, struct amdgpu_device *adev, void *data) +{ + struct kfd_ioctl_spm_args *user_spm_data; + struct kfd_spm_cntr *spm; + unsigned long flags; + int ret = 0; + + user_spm_data = (struct kfd_ioctl_spm_args *) data; + + mutex_lock(&pdd->spm_mutex); + spm = pdd->spm_cntr; + + if (spm == NULL) { + mutex_unlock(&pdd->spm_mutex); + return -EINVAL; + } + + if (user_spm_data->timeout && spm->has_user_buf && + !READ_ONCE(spm->is_user_buf_filled)) { + ret = spm_wait_for_fill_awake(spm, user_spm_data); + if (ret == -ETIME) { + /* Copy (partial) data to user buffer after a timeout */ + schedule_work(&pdd->spm_work); + flush_work(&pdd->spm_work); + /* This is not an error */ + ret = 0; + } else if (ret) { + /* handle other errors normally, including -ERESTARTSYS */ + mutex_unlock(&pdd->spm_mutex); + return ret; + } + } else if (!user_spm_data->timeout && spm->has_user_buf) { + /* Copy (partial) data to user buffer */ + schedule_work(&pdd->spm_work); + flush_work(&pdd->spm_work); + } + + if (spm->has_user_buf || user_spm_data->dest_buf) { + /* Get info about filled space in previous output buffer. + * Setup new dest buf if provided. + */ + spm_update_dest_info(pdd, user_spm_data); + } + + if (user_spm_data->dest_buf) { + /* Start SPM if necessary*/ + if (spm->is_spm_started == false) { + amdgpu_amdkfd_rlc_spm_cntl(adev, 1); + spin_lock_irqsave(&pdd->spm_irq_lock, flags); + spm->is_spm_started = true; + /* amdgpu_amdkfd_rlc_spm_cntl() will reset SPM and wptr will become 0. + * Adjust rptr accordingly + */ + spm->ring_rptr = 0; + spin_unlock_irqrestore(&pdd->spm_irq_lock, flags); + } else { + /* If SPM was already started, there may already + * be data in the ring-buffer that needs to be read. + */ + schedule_work(&pdd->spm_work); + } + } else { + amdgpu_amdkfd_rlc_spm_cntl(adev, 0); + spin_lock_irqsave(&pdd->spm_irq_lock, flags); + spm->is_spm_started = false; + /* amdgpu_amdkfd_rlc_spm_cntl() will reset SPM and wptr will become 0. + * Adjust rptr accordingly + */ + spm->ring_rptr = 0; + spin_unlock_irqrestore(&pdd->spm_irq_lock, flags); + } + + mutex_unlock(&pdd->spm_mutex); + + return ret; +} + +int kfd_rlc_spm(struct kfd_process *p, void *data) +{ + struct kfd_ioctl_spm_args *args = data; + struct kfd_node *dev; + struct kfd_process_device *pdd; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); + return -EINVAL; + } + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) + return -EINVAL; + + switch (args->op) { + case KFD_IOCTL_SPM_OP_ACQUIRE: + dev->spm_pasid = p->pasid; + return kfd_acquire_spm(pdd, dev->adev); + + case KFD_IOCTL_SPM_OP_RELEASE: + return kfd_release_spm(pdd, dev->adev); + + case KFD_IOCTL_SPM_OP_SET_DEST_BUF: + return kfd_set_dest_buffer(pdd, dev->adev, data); + + default: + return -EINVAL; + } + + return -EINVAL; +} + +void kgd2kfd_spm_interrupt(struct kfd_dev *kfd) +{ + struct kfd_process_device *pdd; + struct kfd_node *dev = kfd->nodes[0]; + uint16_t pasid = dev->spm_pasid; + + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + unsigned long flags; + + if (!p) { + pr_debug("kfd_spm_interrupt p = %p\n", p); + return; /* Presumably process exited. */ + } + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) + return; + + spin_lock_irqsave(&pdd->spm_irq_lock, flags); + + if (pdd->spm_cntr && pdd->spm_cntr->is_spm_started) + schedule_work(&pdd->spm_work); + spin_unlock_irqrestore(&pdd->spm_irq_lock, flags); + + kfd_unref_process(p); +} + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2339bbdf452fb..8eb37bd0ebcae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -184,7 +184,11 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - +#ifdef HAVE_DEV_PAGEMAP_RANGE bo_adev->kfd.pgmap.range.start; +#else + bo_adev->kfd.dev->pgmap.res.start; +#endif addr[i] |= SVM_RANGE_VRAM_DOMAIN; pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]); continue; @@ -309,12 +313,13 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) } static void -svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, - uint8_t *granularity, uint32_t *flags) +svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location, + int32_t *prefetch_loc, uint8_t *granularity, + uint32_t *flags) { *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; - *granularity = 9; + *granularity = svms->default_granularity; *flags = KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT; } @@ -358,7 +363,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, bitmap_copy(prange->bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); - svm_range_set_default_attributes(&prange->preferred_loc, + svm_range_set_default_attributes(svms, &prange->preferred_loc, &prange->prefetch_loc, &prange->granularity, &prange->flags); @@ -404,6 +409,27 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); + + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + struct kfd_process_device *pdd; + struct kfd_process *p; + struct mm_struct *mm; + + mm = svm_bo->eviction_fence->mm; + /* + * The forked child process takes svm_bo device pages ref, svm_bo could be + * released after parent process is gone. + */ + p = kfd_lookup_process_by_mm(mm); + if (p) { + pdd = kfd_get_process_device_data(svm_bo->node, p); + if (pdd) + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage); + kfd_unref_process(p); + } + mmput(mm); + } + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) /* We're not in the eviction worker. Signal the fence. */ dma_fence_signal(&svm_bo->eviction_fence->base); @@ -531,6 +557,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear) { + struct kfd_process_device *pdd; struct amdgpu_bo_param bp; struct svm_range_bo *svm_bo; struct amdgpu_bo_user *ubo; @@ -603,7 +630,7 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, } } - r = dma_resv_reserve_fences(bo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(amdkcl_ttm_resvp(&bo->tbo), 1); if (r) { pr_debug("failed %d to reserve bo\n", r); amdgpu_bo_unreserve(bo); @@ -622,6 +649,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, list_add(&prange->svm_bo_list, &svm_bo->range_list); spin_unlock(&svm_bo->list_lock); + pdd = svm_range_get_pdd_by_node(prange, node); + if (pdd) + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage); + return 0; reserve_bo_failed: @@ -1249,13 +1280,7 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): - if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_node != node) - mapping_flags |= AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } + mapping_flags |= AMDGPU_VM_MTYPE_NC; break; default: mapping_flags |= coherent ? @@ -2262,16 +2287,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; struct kfd_process *p; - int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); -restart: - drain = atomic_read(&svms->drain_pagefaults); - if (!drain) - return; - for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) @@ -2291,8 +2310,6 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } - if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) - goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -2314,17 +2331,8 @@ static void svm_range_deferred_list_work(struct work_struct *work) prange->start, prange->last, prange->work_item.op); mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); - /* Checking for the need to drain retry faults must be inside - * mmap write lock to serialize with munmap notifiers. - */ - if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } + mmap_write_lock(mm); /* Remove from deferred_list must be inside mmap write lock, for * two race cases: @@ -2445,6 +2453,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct kfd_process *p; unsigned long s, l; bool unmap_parent; + uint32_t i; if (atomic_read(&prange->queue_refcount)) { int r; @@ -2464,11 +2473,38 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - /* Make sure pending page faults are drained in the deferred worker - * before the range is freed to avoid straggler interrupts on - * unmapped memory causing "phantom faults". + /* calculate time stamps that are used to decide which page faults need be + * dropped or handled before unmap pages from gpu vm */ - atomic_inc(&svms->drain_pagefaults); + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_ih_ring *ih; + uint32_t checkpoint_wptr; + + pdd = p->pdds[i]; + if (!pdd) + continue; + + adev = pdd->dev->adev; + + /* Check and drain ih1 ring if cam not available */ + if (adev->irq.ih1.ring_size) { + ih = &adev->irq.ih1; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) { + svms->checkpoint_ts[i] = + amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + continue; + } + } + + /* check if dev->irq.ih_soft is not empty */ + ih = &adev->irq.ih_soft; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + } unmap_parent = start <= prange->start && last >= prange->last; @@ -2692,9 +2728,10 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma); start_limit = max(vma->vm_start >> PAGE_SHIFT, - (unsigned long)ALIGN_DOWN(addr, 2UL << 8)); + (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity)); end_limit = min(vma->vm_end >> PAGE_SHIFT, - (unsigned long)ALIGN(addr + 1, 2UL << 8)); + (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity)); + /* First range that starts after the fault address */ node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX); if (node) { @@ -2909,7 +2946,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { unsigned long start, last, size; struct mm_struct *mm = NULL; @@ -2919,7 +2956,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ktime_t timestamp = ktime_get_boottime(); struct kfd_node *node; int32_t best_loc; - int32_t gpuidx = MAX_GPU_INSTANCE; + int32_t gpuid, gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; bool migration = false; @@ -2940,11 +2977,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); if (atomic_read(&svms->drain_pagefaults)) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr); r = 0; goto out; } + node = kfd_node_by_irq_ids(adev, node_id, vmid); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); + r = -EFAULT; + goto out; + } + + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id); + r = -EFAULT; + goto out; + } + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; + goto out; + } else + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -2961,13 +3025,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, vmid); - if (!node) { - pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, - vmid); - r = -EFAULT; - goto out; - } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -3182,8 +3239,9 @@ void svm_range_list_fini(struct kfd_process *p) /* * Ensure no retry fault comes in afterwards, as page fault handler will * not find kfd process and take mm lock to recover fault. + * stop kfd page fault handing, then wait pending page faults got drained */ - atomic_inc(&p->svms.drain_pagefaults); + atomic_set(&p->svms.drain_pagefaults, 1); svm_range_drain_retry_fault(&p->svms); list_for_each_entry_safe(prange, next, &p->svms.list, list) { @@ -3217,6 +3275,12 @@ int svm_range_list_init(struct kfd_process *p) if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev)) bitmap_set(svms->bitmap_supported, i, 1); + /* Value of default granularity cannot exceed 0x1B, the + * number of pages supported by a 4-level paging table + */ + svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B); + pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity); + return 0; } @@ -3744,7 +3808,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, node = interval_tree_iter_first(&svms->objects, start, last); if (!node) { pr_debug("range attrs not found return default values\n"); - svm_range_set_default_attributes(&location, &prefetch_loc, + svm_range_set_default_attributes(svms, &location, &prefetch_loc, &granularity, &flags_and); flags_or = flags_and; if (p->xnack_enabled) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 747325a2ea896..bddd24f04669e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -174,7 +174,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t vmid, uint32_t node_id, uint64_t addr, + uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts, bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, @@ -225,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p) static inline int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t client_id, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { return -EFAULT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3871591c9aec9..7ccd9983e1368 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -224,6 +224,8 @@ struct kfd_topology_device *kfd_create_topology_device( sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value) #define sysfs_show_32bit_val(buffer, offs, value) \ sysfs_show_gen_prop(buffer, offs, "%u\n", value) +#define sysfs_show_64bit_val(buffer, offs, value) \ + sysfs_show_gen_prop(buffer, offs, "%llu\n", value) #define sysfs_show_str_val(buffer, offs, value) \ sysfs_show_gen_prop(buffer, offs, "%s\n", value) @@ -313,11 +315,25 @@ static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, { int offs = 0; struct kfd_mem_properties *mem; + uint64_t used_mem; /* Making sure that the buffer is an empty string */ buffer[0] = 0; - mem = container_of(attr, struct kfd_mem_properties, attr); + if (strcmp(attr->name, "used_memory") == 0) { + mem = container_of(attr, struct kfd_mem_properties, + attr_used); + if (mem->gpu) { + if (kfd_devcgroup_check_permission(mem->gpu)) + return -EPERM; + used_mem = amdgpu_amdkfd_get_vram_usage(mem->gpu->adev); + return sysfs_show_64bit_val(buffer, offs, used_mem); + } + /* TODO: Report APU/CPU-allocated memory; For now return 0 */ + return 0; + } + + mem = container_of(attr, struct kfd_mem_properties, attr_props); if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu)) return -EPERM; sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type); @@ -575,6 +591,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) struct kfd_mem_properties *mem; struct kfd_perf_properties *perf; + if (dev->kobj_p2plink) { + list_for_each_entry(p2plink, &dev->p2p_link_props, list) + if (p2plink->kobj) { + kfd_remove_sysfs_file(p2plink->kobj, + &p2plink->attr); + p2plink->kobj = NULL; + } + kobject_del(dev->kobj_p2plink); + kobject_put(dev->kobj_p2plink); + dev->kobj_p2plink = NULL; + } + if (dev->kobj_iolink) { list_for_each_entry(iolink, &dev->io_link_props, list) if (iolink->kobj) { @@ -614,7 +642,12 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) if (dev->kobj_mem) { list_for_each_entry(mem, &dev->mem_props, list) if (mem->kobj) { - kfd_remove_sysfs_file(mem->kobj, &mem->attr); + /* TODO: Remove when CPU/APU supported */ + if (dev->node_props.cpu_cores_count == 0) + sysfs_remove_file(mem->kobj, + &mem->attr_used); + kfd_remove_sysfs_file(mem->kobj, + &mem->attr_props); mem->kobj = NULL; } kobject_del(dev->kobj_mem); @@ -650,9 +683,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; struct kfd_perf_properties *perf; - int ret; - uint32_t i, num_attrs; + uint32_t num_attrs; struct attribute **attrs; + int ret; + uint32_t i; if (WARN_ON(dev->kobj_node)) return -EEXIST; @@ -725,12 +759,23 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, return ret; } - mem->attr.name = "properties"; - mem->attr.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&mem->attr); - ret = sysfs_create_file(mem->kobj, &mem->attr); + mem->attr_props.name = "properties"; + mem->attr_props.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&mem->attr_props); + ret = sysfs_create_file(mem->kobj, &mem->attr_props); if (ret < 0) return ret; + + /* TODO: Support APU/CPU memory usage */ + if (dev->node_props.cpu_cores_count == 0) { + mem->attr_used.name = "used_memory"; + mem->attr_used.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&mem->attr_used); + ret = sysfs_create_file(mem->kobj, &mem->attr_used); + if (ret < 0) + return ret; + } + i++; } @@ -1050,8 +1095,10 @@ int kfd_topology_init(void) goto err; } +#ifdef HAVE_AMD_IOMMU_PC_SUPPORTED kdev = list_first_entry(&temp_topology_device_list, struct kfd_topology_device, list); +#endif down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, @@ -1225,7 +1272,15 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev, if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI) return; - /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */ + /* checkout source dev has atomics support on root. */ + if (dev->gpu && (!dev->gpu->kfd->pci_atomic_requested || + dev->gpu->adev->asic_type == CHIP_HAWAII)) { + link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; + return; + } + + /* check target_gpu_dev is atomics capable. */ if (target_gpu_dev) { uint32_t cap; @@ -1377,6 +1432,27 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); } } + + /* Create indirect links so apply flags setting to all */ + list_for_each_entry(link, &dev->p2p_link_props, list) { + link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(dev, NULL, link); + peer_dev = kfd_topology_device_by_proximity_domain( + link->node_to); + + if (!peer_dev) + continue; + + list_for_each_entry(inbound_link, &peer_dev->p2p_link_props, + list) { + if (inbound_link->node_to != link->node_from) + continue; + + inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; + kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); + kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + } + } } static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, @@ -1488,7 +1564,6 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g return ret; } -#if defined(CONFIG_HSA_AMD_P2P) static int kfd_add_peer_prop(struct kfd_topology_device *kdev, struct kfd_topology_device *peer, int from, int to) { @@ -1556,16 +1631,12 @@ static int kfd_add_peer_prop(struct kfd_topology_device *kdev, return ret; } -#endif static int kfd_dev_create_p2p_links(void) { struct kfd_topology_device *dev; struct kfd_topology_device *new_dev; -#if defined(CONFIG_HSA_AMD_P2P) - uint32_t i; -#endif - uint32_t k; + uint32_t i, k; int ret = 0; k = 0; @@ -1586,7 +1657,6 @@ static int kfd_dev_create_p2p_links(void) goto out; /* create p2p links */ -#if defined(CONFIG_HSA_AMD_P2P) i = 0; list_for_each_entry(dev, &topology_device_list, list) { if (dev == new_dev) @@ -1607,7 +1677,6 @@ static int kfd_dev_create_p2p_links(void) next: i++; } -#endif out: return ret; @@ -2317,7 +2386,11 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) if (first_cpu_of_numa_node >= nr_cpu_ids) return -1; #ifdef CONFIG_X86_64 +#ifdef HAVE_CPUINFO_TOPOLOGY_IN_CPUINFO_X86_STRUCT return cpu_data(first_cpu_of_numa_node).topo.apicid; +#else + return cpu_data(first_cpu_of_numa_node).apicid; +#endif #else return first_cpu_of_numa_node; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 155b5c410af16..22e4b2cca1fe4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -90,7 +90,8 @@ struct kfd_mem_properties { uint32_t mem_clk_max; struct kfd_node *gpu; struct kobject *kobj; - struct attribute attr; + struct attribute attr_props; + struct attribute attr_used; }; #define CACHE_SIBLINGMAP_SIZE 128 diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c b/drivers/gpu/drm/amd/amdkfd/kfd_trace.c similarity index 91% rename from drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c rename to drivers/gpu/drm/amd/amdkfd/kfd_trace.c index bb0e1b80ec3ca..805a1da90bb15 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_trace.c @@ -1,5 +1,5 @@ /* - * Copyright 2012-16 Advanced Micro Devices, Inc. + * Copyright 2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,7 +19,8 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * - * Authors: AMD - * */ + +#define CREATE_TRACE_POINTS +#include "kfd_trace.h" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_trace.h b/drivers/gpu/drm/amd/amdkfd/kfd_trace.h new file mode 100644 index 0000000000000..16470bec1c317 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_trace.h @@ -0,0 +1,151 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#if !defined(_AMDKFD_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _KFD_TRACE_H_ + + +#include +#include +#include + +#include "kfd_priv.h" +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM amdkfd +#define TRACE_INCLUDE_FILE kfd_trace + + +TRACE_EVENT(kfd_map_memory_to_gpu_start, + TP_PROTO(struct kfd_process *p), + TP_ARGS(p), + TP_STRUCT__entry( + __field(unsigned int, pasid) + ), + TP_fast_assign( + __entry->pasid = p->pasid; + ), + TP_printk("pasid =%u", __entry->pasid) +); + + +TRACE_EVENT(kfd_map_memory_to_gpu_end, + TP_PROTO(struct kfd_process *p, u32 array_size, char *pStatusMsg), + TP_ARGS(p, array_size, pStatusMsg), + TP_STRUCT__entry( + __field(unsigned int, pasid) + __field(unsigned int, array_size) + __string(pStatusMsg, pStatusMsg) + ), + TP_fast_assign( + __entry->pasid = p->pasid; + __entry->array_size = array_size; + __amdkcl_assign_str(pStatusMsg, pStatusMsg); + ), + TP_printk("pasid = %u, array_size = %u, StatusMsg=%s", + __entry->pasid, + __entry->array_size, + __get_str(pStatusMsg)) +); + + +TRACE_EVENT(kfd_kgd2kfd_schedule_evict_and_restore_process, + TP_PROTO(struct kfd_process *p, u32 delay_jiffies), + TP_ARGS(p, delay_jiffies), + TP_STRUCT__entry( + __field(unsigned int, pasid) + __field(unsigned int, delay_jiffies) + ), + TP_fast_assign( + __entry->pasid = p->pasid; + __entry->delay_jiffies = delay_jiffies; + ), + TP_printk("pasid = %u, delay_jiffies = %u", + __entry->pasid, + __entry->delay_jiffies) +); + + +TRACE_EVENT(kfd_evict_process_worker_start, + TP_PROTO(struct kfd_process *p), + TP_ARGS(p), + TP_STRUCT__entry( + __field(unsigned int, pasid) + ), + TP_fast_assign( + __entry->pasid = p->pasid; + ), + TP_printk("pasid=%u", __entry->pasid) +); + + +TRACE_EVENT(kfd_evict_process_worker_end, + TP_PROTO(struct kfd_process *p, char *pStatusMsg), + TP_ARGS(p, pStatusMsg), + TP_STRUCT__entry( + __field(unsigned int, pasid) + __string(pStatusMsg, pStatusMsg) + ), + TP_fast_assign( + __entry->pasid = p->pasid; + __amdkcl_assign_str(pStatusMsg, pStatusMsg); + ), + TP_printk("pasid=%u, StatusMsg=%s", + __entry->pasid, __get_str(pStatusMsg)) +); + + +TRACE_EVENT(kfd_restore_process_worker_start, + TP_PROTO(struct kfd_process *p), + TP_ARGS(p), + TP_STRUCT__entry( + __field(unsigned int, pasid) + ), + TP_fast_assign( + __entry->pasid = p->pasid; + ), + TP_printk("pasid=%u", __entry->pasid) +); + +TRACE_EVENT(kfd_restore_process_worker_end, + TP_PROTO(struct kfd_process *p, char *pStatusMsg), + TP_ARGS(p, pStatusMsg), + TP_STRUCT__entry( + __field(unsigned int, pasid) + __string(pStatusMsg, pStatusMsg) + ), + TP_fast_assign( + entry->pasid = p->pasid; + __amdkcl_assign_str(pStatusMsg, pStatusMsg); + ), + TP_printk("pasid=%u, StatusMsg=%s", + __entry->pasid, __get_str(pStatusMsg)) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index 10138676f27fd..e5c0205f26181 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -29,6 +29,7 @@ #define SOC15_INTSRC_CP_BAD_OPCODE 183 #define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 #define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_VMC_UTCL2_POISON 1 #define SOC15_INTSRC_SDMA_TRAP 224 #define SOC15_INTSRC_SDMA_ECC 220 #define SOC21_INTSRC_SDMA_TRAP 49 diff --git a/drivers/gpu/drm/amd/amdxcp/Makefile b/drivers/gpu/drm/amd/amdxcp/Makefile index 870501a4bb8c0..5790475464f02 100644 --- a/drivers/gpu/drm/amd/amdxcp/Makefile +++ b/drivers/gpu/drm/amd/amdxcp/Makefile @@ -23,3 +23,6 @@ amdxcp-y := amdgpu_xcp_drv.o obj-$(CONFIG_DRM_AMDGPU) += amdxcp.o + +AMD_XCP_PATH := $(src) +include $(AMD_XCP_PATH)/backport/Makefile diff --git a/drivers/gpu/drm/amd/amdxcp/backport/Makefile b/drivers/gpu/drm/amd/amdxcp/backport/Makefile new file mode 100644 index 0000000000000..4217ff962b225 --- /dev/null +++ b/drivers/gpu/drm/amd/amdxcp/backport/Makefile @@ -0,0 +1,10 @@ +BACKPORT_OBJS := kcl_drm_drv.o + +amdxcp-y += $(addprefix ./backport/,$(BACKPORT_OBJS)) + +ccflags-y += \ + -I$(AMD_XCP_PATH)/ \ + -I$(AMD_XCP_PATH)/backport/include \ + -I$(AMD_XCP_PATH)/../dkms \ + -include config/config.h \ + -include backport/backport.h diff --git a/drivers/gpu/drm/amd/amdxcp/backport/backport.h b/drivers/gpu/drm/amd/amdxcp/backport/backport.h new file mode 100644 index 0000000000000..9550a63357fb7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdxcp/backport/backport.h @@ -0,0 +1 @@ +#include "kcl/kcl_drm_drv.h" \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdxcp/backport/include/kcl/kcl_drm_drv.h b/drivers/gpu/drm/amd/amdxcp/backport/include/kcl/kcl_drm_drv.h new file mode 100644 index 0000000000000..c331d7f60606b --- /dev/null +++ b/drivers/gpu/drm/amd/amdxcp/backport/include/kcl/kcl_drm_drv.h @@ -0,0 +1,42 @@ +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009-2010, Code Aurora Forum. + * Copyright 2016 Intel Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDXCP_BACKPORT_KCL_DRM_DRV_H__ +#define __AMDXCP_BACKPORT_KCL_DRM_DRV_H__ + +#include + +/* Copied from v5.7-rc1-343-gb0b5849e0cc0 include/drm/drm_drv.h */ +#ifndef devm_drm_dev_alloc +#define AMDKCL_DEVM_DRM_DEV_ALLOC 1 +void *__devm_drm_dev_alloc(struct device *parent, struct drm_driver *driver, + size_t size, size_t offset); +#define devm_drm_dev_alloc(parent, driver, type, member) \ + ((type *) __devm_drm_dev_alloc(parent, driver, sizeof(type), \ + offsetof(type, member))) + +#endif + +#endif diff --git a/drivers/gpu/drm/amd/amdxcp/backport/kcl_drm_drv.c b/drivers/gpu/drm/amd/amdxcp/backport/kcl_drm_drv.c new file mode 100644 index 0000000000000..cc1da02e06e84 --- /dev/null +++ b/drivers/gpu/drm/amd/amdxcp/backport/kcl_drm_drv.c @@ -0,0 +1,58 @@ +/* + * Created: Fri Jan 19 10:48:35 2001 by faith@acm.org + * + * Copyright 2001 VA Linux Systems, Inc., Sunnyvale, California. + * All Rights Reserved. + * + * Author Rickard E. (Rik) Faith + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include + +#ifdef AMDKCL_DEVM_DRM_DEV_ALLOC +/* Copied from v5.7-rc1-343-gb0b5849e0cc0 drivers/gpu/drm/drm_drv.c and modified for KCL */ +void *__devm_drm_dev_alloc(struct device *parent, struct drm_driver *driver, + size_t size, size_t offset) +{ + void *container; + struct drm_device *drm; + int ret; + + container = kzalloc(size, GFP_KERNEL); + if (!container) + return ERR_PTR(-ENOMEM); + + drm = container + offset; + ret = drm_dev_init(drm, driver, parent); + if (ret) { + drm_dev_put(drm); + return ERR_PTR(ret); + } +#ifdef HAVE_DRM_DRM_MANAGED_H + drmm_add_final_kfree(drm, container); +#endif + drm->dev_private = container; + return container; +} + +#endif diff --git a/drivers/gpu/drm/amd/backport/Makefile b/drivers/gpu/drm/amd/backport/Makefile new file mode 100644 index 0000000000000..2d01094326e2c --- /dev/null +++ b/drivers/gpu/drm/amd/backport/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: MIT +BACKPORT_OBJS := kcl_drm_drv.o kcl_drm_gem_ttm_helper.o \ + kcl_drm_gem.o kcl_drm_file.o kcl_memory.o + +amdgpu-y += $(addprefix ../backport/,$(BACKPORT_OBJS)) + +ccflags-y += \ + -I$(FULL_AMD_PATH) \ + -I$(FULL_AMD_PATH)/backport/include \ + -I$(FULL_AMD_PATH)/dkms \ + -include config/config.h \ + -include backport/backport.h + +ccflags-y += -DHAVE_CONFIG_H diff --git a/drivers/gpu/drm/amd/backport/backport.h b/drivers/gpu/drm/amd/backport/backport.h new file mode 100644 index 0000000000000..013a98f215a07 --- /dev/null +++ b/drivers/gpu/drm/amd/backport/backport.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef AMDGPU_BACKPORT_H +#define AMDGPU_BACKPORT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_DRM_DRIVER_GEM_PRIME_RES_OBJ +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kcl/kcl_amdgpu_drm_fb_helper.h" +#include "kcl/kcl_amdgpu.h" +#include "kcl/kcl_amdgpu_drm_drv.h" +#include "kcl/kcl_amdgpu_drm_gem.h" +#include "kcl/kcl_drm_gem_ttm_helper.h" +#include "kcl/kcl_mce.h" +#include "kcl/kcl_drm_aperture.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* + * v5.13-rc3-1669-gba6cd766e0bf + * ("drm/plane: Move drm_plane_enable_fb_damage_clips into core") + * move drm_plane_enable_fb_damage_clips() to drm_planer.h. + * include drm_damage_helper.h to fix the missing function declaration for legacy kernel. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif /* AMDGPU_BACKPORT_H */ diff --git a/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu.h b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu.h new file mode 100644 index 0000000000000..33bf87c0568bd --- /dev/null +++ b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef AMDGPU_BACKPORT_KCL_AMDGPU_H +#define AMDGPU_BACKPORT_KCL_AMDGPU_H + +#include +#include "amdgpu.h" +#include + +#ifndef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP +static inline u32 kcl_amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int crtc) +{ + struct drm_crtc *drm_crtc = drm_crtc_from_index(dev, crtc); + + return amdgpu_get_vblank_counter_kms(drm_crtc); +} + +static inline int kcl_amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int crtc) +{ + struct drm_crtc *drm_crtc = drm_crtc_from_index(dev, crtc); + + return amdgpu_enable_vblank_kms(drm_crtc); +} + +static inline void kcl_amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int crtc) +{ + struct drm_crtc *drm_crtc = drm_crtc_from_index(dev, crtc); + + return amdgpu_disable_vblank_kms(drm_crtc); +} + +static inline bool kcl_amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) +{ + return !!amdgpu_display_get_crtc_scanoutpos(dev, pipe, in_vblank_irq, vpos, hpos, stime, etime, mode); +} + +static inline bool kcl_amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, + int *max_error, ktime_t *vblank_time, + bool in_vblank_irq) +{ + return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, vblank_time, in_vblank_irq); +} +#endif /* HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP */ + +static inline ktime_t kcl_amdgpu_get_vblank_time_ns(struct drm_vblank_crtc *vblank) +{ + return vblank->time; +} + +#endif /* AMDGPU_BACKPORT_KCL_AMDGPU_H */ diff --git a/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_drv.h b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_drv.h new file mode 100644 index 0000000000000..926e4cd441519 --- /dev/null +++ b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_drv.h @@ -0,0 +1,43 @@ +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009-2010, Code Aurora Forum. + * Copyright 2016 Intel Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_BACKPORT_KCL_AMDGPU_DRM_DRV_H__ +#define __AMDGPU_BACKPORT_KCL_AMDGPU_DRM_DRV_H__ + +#include + +/* Copied from v5.7-rc1-343-gb0b5849e0cc0 include/drm/drm_drv.h */ +#ifndef devm_drm_dev_alloc +#define AMDKCL_DEVM_DRM_DEV_ALLOC 1 +void *__devm_drm_dev_alloc(struct device *parent, struct drm_driver *driver, + size_t size, size_t offset); +#define devm_drm_dev_alloc(parent, driver, type, member) \ + ((type *) __devm_drm_dev_alloc(parent, driver, sizeof(type), \ + offsetof(type, member))) + +void amdkcl_drm_dev_release(struct drm_device *ddev); +#endif + +#endif diff --git a/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_fb_helper.h b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_fb_helper.h new file mode 100644 index 0000000000000..8c75bfc4e993b --- /dev/null +++ b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_fb_helper.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2006-2009 Red Hat Inc. + * Copyright (c) 2006-2008 Intel Corporation + * Copyright (c) 2007 Dave Airlie + * + * DRM framebuffer helper functions + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Authors: + * Dave Airlie + * Jesse Barnes + */ +#ifndef AMDGPU_BACKPORT_KCL_AMDGPU_DRM_FB_HELPER_H +#define AMDGPU_BACKPORT_KCL_AMDGPU_DRM_FB_HELPER_H + +#include +#include +#include +#include +#include "amdgpu.h" + +#ifndef HAVE_DRM_FB_HELPER_LASTCLOSE +void drm_fb_helper_lastclose(struct drm_device *dev); +void drm_fb_helper_output_poll_changed(struct drm_device *dev); +#endif + +#endif diff --git a/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_gem.h b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_gem.h new file mode 100644 index 0000000000000..9ad60a7646fa0 --- /dev/null +++ b/drivers/gpu/drm/amd/backport/include/kcl/kcl_amdgpu_drm_gem.h @@ -0,0 +1,49 @@ +/* + * GEM Graphics Execution Manager Driver Interfaces + * + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009-2010, Code Aurora Forum. + * All rights reserved. + * Copyright © 2014 Intel Corporation + * Daniel Vetter + * + * Author: Rickard E. (Rik) Faith + * Author: Gareth Hughes + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_BACKPORT_KCL_AMDGPU_DRM_GEM_H__ +#define __AMDGPU_BACKPORT_KCL_AMDGPU_DRM_GEM_H__ + +int _kcl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +static inline int amdkcl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) { + return _kcl_drm_gem_mmap(filp, vma); +} + + +int _kcl_drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + +static inline int amdkcl_drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { + return _kcl_drm_gem_prime_mmap(obj, vma); +} + +#endif diff --git a/drivers/gpu/drm/amd/backport/include/kcl/kcl_drm_file.h b/drivers/gpu/drm/amd/backport/include/kcl/kcl_drm_file.h new file mode 100644 index 0000000000000..a067b59578b6c --- /dev/null +++ b/drivers/gpu/drm/amd/backport/include/kcl/kcl_drm_file.h @@ -0,0 +1,8 @@ +#ifndef __AMDGPU_BACKPORT_KCL_DRM_DRV_H__ +#define __AMDGPU_BACKPORT_KCL_DRM_DRV_H__ +#include + +#ifndef HAVE_DRM_SHOW_FDINFO +void drm_show_fdinfo(struct seq_file *m, struct file *f); +#endif +#endif diff --git a/drivers/gpu/drm/amd/backport/include/kcl/kcl_drm_gem_ttm_helper.h b/drivers/gpu/drm/amd/backport/include/kcl/kcl_drm_gem_ttm_helper.h new file mode 100644 index 0000000000000..aa89982b0c2c7 --- /dev/null +++ b/drivers/gpu/drm/amd/backport/include/kcl/kcl_drm_gem_ttm_helper.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _KCL_KCL_DRM_GEM_TTM_HELPER_H_H +#define _KCL_KCL_DRM_GEM_TTM_HELPER_H_H + +#include +#include + +#if !defined(HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_2ARGS) +void amdgpu_gem_prime_vunmap(struct drm_gem_object *gem, + void *vaddr); +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); +#elif !defined(HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG) +int _kcl_drm_gem_ttm_vmap(struct drm_gem_object *gem, + struct dma_buf_map *map); +void _kcl_drm_gem_ttm_vunmap(struct drm_gem_object *gem, + struct dma_buf_map *map); +static inline +void amdgpu_drm_gem_ttm_vunmap(struct drm_gem_object *gem, + struct dma_buf_map *map) +{ + _kcl_drm_gem_ttm_vunmap(gem, map); +} + +static inline +int amdgpu_drm_gem_ttm_vmap(struct drm_gem_object *obj, + struct dma_buf_map *map) +{ + return _kcl_drm_gem_ttm_vmap(obj, map); +} +#endif + +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +void amdgpu_gem_object_free(struct drm_gem_object *obj); +int amdgpu_gem_object_open(struct drm_gem_object *obj, + struct drm_file *file_priv); +void amdgpu_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv); +#endif + +#endif diff --git a/drivers/gpu/drm/amd/backport/kcl_drm_drv.c b/drivers/gpu/drm/amd/backport/kcl_drm_drv.c new file mode 100644 index 0000000000000..0d243c59b5f6a --- /dev/null +++ b/drivers/gpu/drm/amd/backport/kcl_drm_drv.c @@ -0,0 +1,62 @@ +/* + * Created: Fri Jan 19 10:48:35 2001 by faith@acm.org + * + * Copyright 2001 VA Linux Systems, Inc., Sunnyvale, California. + * All Rights Reserved. + * + * Author Rickard E. (Rik) Faith + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "amdgpu.h" + +#ifdef AMDKCL_DEVM_DRM_DEV_ALLOC +/* Copied from v5.7-rc1-343-gb0b5849e0cc0 drivers/gpu/drm/drm_drv.c and modified for KCL */ +void *__devm_drm_dev_alloc(struct device *parent, struct drm_driver *driver, + size_t size, size_t offset) +{ + void *container; + struct drm_device *drm; + int ret; + + container = kzalloc(size, GFP_KERNEL); + if (!container) + return ERR_PTR(-ENOMEM); + + drm = container + offset; + ret = drm_dev_init(drm, driver, parent); + if (ret) { + drm_dev_put(drm); + return ERR_PTR(ret); + } +#ifdef HAVE_DRM_DRM_MANAGED_H + drmm_add_final_kfree(drm, container); +#endif + drm->dev_private = container; + return container; +} + +void amdkcl_drm_dev_release(struct drm_device *ddev) +{ + drm_dev_put(ddev); +} + +#endif diff --git a/drivers/gpu/drm/amd/backport/kcl_drm_file.c b/drivers/gpu/drm/amd/backport/kcl_drm_file.c new file mode 100644 index 0000000000000..5bc74f05d555e --- /dev/null +++ b/drivers/gpu/drm/amd/backport/kcl_drm_file.c @@ -0,0 +1,70 @@ +/* + * \author Rickard E. (Rik) Faith + * \author Daryll Strauss + * \author Gareth Hughes + */ + +/* + * Created: Mon Jan 4 08:58:31 1999 by faith@valinux.com + * + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include "amdgpu_fdinfo.h" +#ifndef HAVE_DRM_SHOW_FDINFO +/** + * drm_show_fdinfo - helper for drm file fops + * @m: output stream + * @f: the device file instance + * + * Helper to implement fdinfo, for userspace to query usage stats, etc, of a + * process using the GPU. See also &drm_driver.show_fdinfo. + * + * For text output format description please see Documentation/gpu/drm-usage-stats.rst + */ +void drm_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct drm_file *file = f->private_data; + struct drm_device *dev = file->minor->dev; + struct drm_printer p = drm_seq_file_printer(m); + + drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); + + if (dev_is_pci(dev->dev)) { + struct pci_dev *pdev = to_pci_dev(dev->dev); + + drm_printf(&p, "drm-pdev:\t%04x:%02x:%02x.%d\n", + pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + } + + amdgpu_show_fdinfo(&p, file); +} +#endif diff --git a/drivers/gpu/drm/amd/backport/kcl_drm_gem.c b/drivers/gpu/drm/amd/backport/kcl_drm_gem.c new file mode 100644 index 0000000000000..84ffb99293e93 --- /dev/null +++ b/drivers/gpu/drm/amd/backport/kcl_drm_gem.c @@ -0,0 +1,151 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include +#include "amdgpu_ttm.h" +#include "amdgpu_dma_buf.h" + +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +int _kcl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) { + return amdgpu_mmap(filp, vma); +} + +int _kcl_drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { + return amdgpu_gem_prime_mmap(obj, vma); +} + +#else +static int _kcl_drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size, + struct vm_area_struct *vma) +{ + int ret; + + /* Check for valid size. */ + if (obj_size < vma->vm_end - vma->vm_start) + return -EINVAL; + + /* Take a ref for this mapping of the object, so that the fault + * handler can dereference the mmap offset's pointer to the object. + * This reference is cleaned up by the corresponding vm_close + * (which should happen whether the vma was created by this call, or + * by a vm_open due to mremap or partial unmap or whatever). + */ + drm_gem_object_get(obj); + + vma->vm_private_data = obj; + vma->vm_ops = obj->funcs->vm_ops; + + if (obj->funcs->mmap) { + ret = obj->funcs->mmap(obj, vma); + if (ret) + goto err_drm_gem_object_put; + WARN_ON(!(vma->vm_flags & VM_DONTEXPAND)); + } else { + if (!vma->vm_ops) { + ret = -EINVAL; + goto err_drm_gem_object_put; + } + + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); + } + + return 0; + +err_drm_gem_object_put: + drm_gem_object_put(obj); + return ret; +} + +int _kcl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) { + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct drm_gem_object *obj = NULL; + struct drm_vma_offset_node *node; + int ret; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + drm_vma_offset_lock_lookup(dev->vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); + if (likely(node)) { + obj = container_of(node, struct drm_gem_object, vma_node); + /* + * When the object is being freed, after it hits 0-refcnt it + * proceeds to tear down the object. In the process it will + * attempt to remove the VMA offset and so acquire this + * mgr->vm_lock. Therefore if we find an object with a 0-refcnt + * that matches our range, we know it is in the process of being + * destroyed and will be freed as soon as we release the lock - + * so we have to check for the 0-refcnted object and treat it as + * invalid. + */ + if (!kref_get_unless_zero(&obj->refcount)) + obj = NULL; + } + drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + + if (!obj) + return -EINVAL; + + if (!drm_vma_node_is_allowed(node, priv)) { + drm_gem_object_put(obj); + return -EACCES; + } + +#ifdef HAVE_DRM_VMA_OFFSET_NODE_READONLY_FIELD + if (node->readonly) { + if (vma->vm_flags & VM_WRITE) { + drm_gem_object_put(obj); + return -EINVAL; + } + + vma->vm_flags &= ~VM_MAYWRITE; + } +#endif + + ret = _kcl_drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, + vma); + + drm_gem_object_put(obj); + + return ret; + +} + +int _kcl_drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { + if (obj->funcs && obj->funcs->mmap) { + vma->vm_ops = obj->funcs->vm_ops; + } + return drm_gem_prime_mmap(obj, vma); +} + +#endif diff --git a/drivers/gpu/drm/amd/backport/kcl_drm_gem_ttm_helper.c b/drivers/gpu/drm/amd/backport/kcl_drm_gem_ttm_helper.c new file mode 100644 index 0000000000000..3f3a70274a81c --- /dev/null +++ b/drivers/gpu/drm/amd/backport/kcl_drm_gem_ttm_helper.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include +#include +#include +#include +#include +#include + +#ifndef drm_gem_ttm_of_gem +#define drm_gem_ttm_of_gem(gem_obj) \ + container_of(gem_obj, struct ttm_buffer_object, base) +#endif + +#ifndef HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_2ARGS +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj); + struct iosys_map map; + + ttm_bo_vmap(bo, &map); + return map.vaddr; +} + +void amdgpu_gem_prime_vunmap(struct drm_gem_object *gem, + void *vaddr) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + struct iosys_map map; + + map.vaddr = vaddr; + map.is_iomem = bo->resource->bus.is_iomem; + + ttm_bo_vunmap(bo, &map); +} +#elif !defined(HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG) + +int _kcl_drm_gem_ttm_vmap(struct drm_gem_object *gem, + struct dma_buf_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + struct iosys_map iosys_map; + int r; + + iosys_map.vaddr = map->vaddr; + iosys_map.is_iomem = map->is_iomem; + + r = ttm_bo_vmap(bo, &iosys_map); + + map->vaddr = iosys_map.vaddr; + map->is_iomem = iosys_map.is_iomem; + return r; +} + +void _kcl_drm_gem_ttm_vunmap(struct drm_gem_object *gem, + struct dma_buf_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem); + struct iosys_map iosys_map; + + iosys_map.vaddr = map->vaddr; + iosys_map.is_iomem = map->is_iomem; + + ttm_bo_vunmap(bo, &iosys_map); + + map->vaddr = iosys_map.vaddr; + map->is_iomem = iosys_map.is_iomem; +} +#endif diff --git a/drivers/gpu/drm/amd/backport/kcl_memory.c b/drivers/gpu/drm/amd/backport/kcl_memory.c new file mode 100644 index 0000000000000..153710b6883de --- /dev/null +++ b/drivers/gpu/drm/amd/backport/kcl_memory.c @@ -0,0 +1,20 @@ +#include + +#ifndef HAVE_FOLLOW_PFN +int _kcl_follow_pfn(struct vm_area_struct *vma, unsigned long address, + unsigned long *pfn) +{ + int ret = -EINVAL; + spinlock_t *ptl; + pte_t *ptep; + + ret = follow_pte(vma, address, &ptep, &ptl); + if (ret) + return ret; + *pfn = pte_pfn(ptep_get(ptep)); + pte_unmap_unlock(ptep, ptl); + return 0; +} + +EXPORT_SYMBOL(_kcl_follow_pfn); +#endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c old mode 100644 new mode 100755 index 7d999e352df3b..f622eb1551df7 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -79,6 +79,7 @@ #include #include #include + #include #include #include @@ -95,7 +96,6 @@ #include #include #include -#include #include @@ -208,10 +208,11 @@ static void update_subconnector_property(struct amdgpu_dm_connector *aconnector) if (aconnector->dc_sink) subconnector = get_subconnector_type(link); - +#ifdef HAVE_DRM_MODE_CONFIG_DP_SUBCONNECTOR_PROPERTY drm_object_property_set_value(&connector->base, connector->dev->mode_config.dp_subconnector_property, subconnector); +#endif } /* @@ -239,7 +240,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); - +static void prepare_flip_isr(struct amdgpu_crtc *acrtc); static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); @@ -320,18 +321,18 @@ static bool dm_is_idle(void *handle) return true; } -static int dm_wait_for_idle(void *handle) +static int dm_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* XXX todo */ return 0; } -static bool dm_check_soft_reset(void *handle) +static bool dm_check_soft_reset(struct amdgpu_ip_block *ip_block) { return false; } -static int dm_soft_reset(void *handle) +static int dm_soft_reset(struct amdgpu_ip_block *ip_block) { /* XXX todo */ return 0; @@ -527,14 +528,23 @@ static void dm_pflip_high_irq(void *interrupt_params) amdgpu_crtc->crtc_id, amdgpu_crtc, vrr_active, (int)!e); } +#ifndef HAVE_KTIME_IS_UNION +static inline ktime_t get_drm_vblank_crtc_time(struct drm_vblank_crtc *vblank) +{ + return kcl_amdgpu_get_vblank_time_ns(vblank); +} +#endif + static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; struct amdgpu_device *adev = irq_params->adev; struct amdgpu_crtc *acrtc; struct drm_device *drm_dev; +#ifndef HAVE_KTIME_IS_UNION struct drm_vblank_crtc *vblank; ktime_t frame_duration_ns, previous_timestamp; +#endif unsigned long flags; int vrr_active; @@ -543,16 +553,19 @@ static void dm_vupdate_high_irq(void *interrupt_params) if (acrtc) { vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc); drm_dev = acrtc->base.dev; +#ifndef HAVE_KTIME_IS_UNION vblank = drm_crtc_vblank_crtc(&acrtc->base); previous_timestamp = atomic64_read(&irq_params->previous_timestamp); - frame_duration_ns = vblank->time - previous_timestamp; + frame_duration_ns = get_drm_vblank_crtc_time(vblank) - previous_timestamp; if (frame_duration_ns > 0) { trace_amdgpu_refresh_rate_track(acrtc->base.index, frame_duration_ns, ktime_divns(NSEC_PER_SEC, frame_duration_ns)); - atomic64_set(&irq_params->previous_timestamp, vblank->time); + atomic64_set(&irq_params->previous_timestamp, + get_drm_vblank_crtc_time(vblank)); } +#endif drm_dbg_vbl(drm_dev, "crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id, @@ -770,10 +783,15 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, return; } + /* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */ + if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) { + DRM_INFO("Skip DMUB HPD IRQ callback in suspend/resume\n"); + return; + } + link_index = notify->link_index; link = adev->dm.dc->links[link_index]; dev = adev->dm.ddev; - drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -795,6 +813,7 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, } } drm_connector_list_iter_end(&iter); + drm_modeset_unlock(&dev->mode_config.connection_mutex); if (hpd_aconnector) { if (notify->type == DMUB_NOTIFICATION_HPD) { @@ -807,6 +826,20 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, } } +/** + * dmub_hpd_sense_callback - DMUB HPD sense processing callback. + * @adev: amdgpu_device pointer + * @notify: dmub notification structure + * + * HPD sense changes can occur during low power states and need to be + * notified from firmware to driver. + */ +static void dmub_hpd_sense_callback(struct amdgpu_device *adev, + struct dmub_notification *notify) +{ + DRM_DEBUG_DRIVER("DMUB HPD SENSE callback.\n"); +} + /** * register_dmub_notify_callback - Sets callback for DMUB notify * @adev: amdgpu_device pointer @@ -948,7 +981,7 @@ static int dm_set_powergating_state(void *handle, } /* Prototypes of private functions */ -static int dm_early_init(void *handle); +static int dm_early_init(struct amdgpu_ip_block *ip_block); /* Allocate memory for FBC compressed data */ static void amdgpu_dm_fbc_init(struct drm_connector *connector) @@ -1007,7 +1040,6 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port, drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { - if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; @@ -1407,7 +1439,7 @@ static void force_connector_state( mutex_unlock(&connector->dev->mode_config.mutex); mutex_lock(&aconnector->hpd_lock); - drm_kms_helper_connector_hotplug_event(connector); + drm_kms_helper_connector_hotplug_event(connector); mutex_unlock(&aconnector->hpd_lock); } @@ -1676,6 +1708,26 @@ dm_allocate_gpu_mem( return da->cpu_ptr; } +void +dm_free_gpu_mem( + struct amdgpu_device *adev, + enum dc_gpu_mem_alloc_type type, + void *pvMem) +{ + struct dal_allocation *da; + + /* walk the da list in DM */ + list_for_each_entry(da, &adev->dm.da_list, list) { + if (pvMem == da->cpu_ptr) { + amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr); + list_del(&da->list); + kfree(da); + break; + } + } + +} + static enum dmub_status dm_dmub_send_vbios_gpint_command(struct amdgpu_device *adev, enum dmub_gpint_command command_code, @@ -1742,16 +1794,60 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * /* Send the chunk */ ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000); if (ret != DMUB_STATUS_OK) - /* No need to free bb here since it shall be done in dm_sw_fini() */ - return NULL; + goto free_bb; } /* Now ask DMUB to copy the bb */ ret = dm_dmub_send_vbios_gpint_command(adev, DMUB_GPINT__BB_COPY, 1, 200000); if (ret != DMUB_STATUS_OK) - return NULL; + goto free_bb; return bb; + +free_bb: + dm_free_gpu_mem(adev, DC_MEM_ALLOC_TYPE_GART, (void *) bb); + return NULL; + +} + +static enum dmub_ips_disable_type dm_get_default_ips_mode( + struct amdgpu_device *adev) +{ + enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE; + + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 5, 0): + /* + * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to + * cause a hard hang. A fix exists for newer PMFW. + * + * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest + * IPS state in all cases, except for s0ix and all displays off (DPMS), + * where IPS2 is allowed. + * + * When checking pmfw version, use the major and minor only. + */ + if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) + /* + * Other ASICs with DCN35 that have residency issues with + * IPS2 in idle. + * We want them to use IPS2 only in display off cases. + */ + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + break; + case IP_VERSION(3, 5, 1): + ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + break; + default: + /* ASICs older than DCN35 do not have IPSs */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0)) + ret = DMUB_IPS_DISABLE_ALL; + break; + } + + return ret; } static int amdgpu_dm_init(struct amdgpu_device *adev) @@ -1769,6 +1865,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) mutex_init(&adev->dm.dpia_aux_lock); mutex_init(&adev->dm.dc_lock); + mutex_init(&adev->dm.audio_lock); if (amdgpu_dm_irq_init(adev)) { @@ -1864,8 +1961,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_DISABLE_IPS) init_data.flags.disable_ips = DMUB_IPS_DISABLE_ALL; - else + else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS_DYNAMIC) + init_data.flags.disable_ips = DMUB_IPS_DISABLE_DYNAMIC; + else if (amdgpu_dc_debug_mask & DC_DISABLE_IPS2_DYNAMIC) + init_data.flags.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; + else if (amdgpu_dc_debug_mask & DC_FORCE_IPS_ENABLE) init_data.flags.disable_ips = DMUB_IPS_ENABLE; + else + init_data.flags.disable_ips = dm_get_default_ips_mode(adev); init_data.flags.disable_ips_in_vpb = 0; @@ -1923,6 +2026,25 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */ adev->dm.dc->debug.ignore_cable_id = true; +#ifndef HAVE_DRM_DP_REMOVE_RAYLOAD_PART + /* TODO: There is a new drm mst change where the freedom of + * vc_next_start_slot update is revoked/moved into drm, instead of in + * driver. This forces us to make sure to get vc_next_start_slot updated + * in drm function each time without considering if mst_state is active + * or not. Otherwise, next time hotplug will give wrong start_slot + * number. We are implementing a temporary solution to even notify drm + * mst deallocation when link is no longer of MST type when uncommitting + * the stream so we will have more time to work on a proper solution. + * Ideally when dm_helpers_dp_mst_stop_top_mgr message is triggered, we + * should notify drm to do a complete "reset" of its states and stop + * calling further drm mst functions when link is no longer of an MST + * type. This could happen when we unplug an MST hubs/displays. When + * uncommit stream comes later after unplug, we should just reset + * hardware states only. + */ + adev->dm.dc->debug.temp_mst_deallocation_sequence = true; +#endif //HAVE_DRM_DP_REMOVE_RAYLOAD_PART + if (adev->dm.dc->caps.dp_hdmi21_pcon_support) DRM_INFO("DP-HDMI FRL PCON supported\n"); @@ -1966,7 +2088,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n"); } - if (adev->dm.dc->caps.ips_support && adev->dm.dc->config.disable_ips == DMUB_IPS_ENABLE) + if (adev->dm.dc->caps.ips_support && + adev->dm.dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) adev->dm.idle_workqueue = idle_create_workqueue(adev); if (adev->dm.dc->caps.max_links > 0 && adev->family >= AMDGPU_FAMILY_RV) { @@ -2048,9 +2171,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) return -EINVAL; } -static int amdgpu_dm_early_fini(void *handle) +static int amdgpu_dm_early_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_dm_audio_fini(adev); @@ -2442,9 +2565,9 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) return 0; } -static int dm_sw_init(void *handle) +static int dm_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; adev->dm.cgs_device = amdgpu_cgs_create_device(adev); @@ -2464,9 +2587,9 @@ static int dm_sw_init(void *handle) return load_dmcu_fw(adev); } -static int dm_sw_fini(void *handle) +static int dm_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct dal_allocation *da; list_for_each_entry(da, &adev->dm.da_list, list) { @@ -2474,11 +2597,11 @@ static int dm_sw_fini(void *handle) amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr); list_del(&da->list); kfree(da); + adev->dm.bb_from_dmub = NULL; break; } } - adev->dm.bb_from_dmub = NULL; kfree(adev->dm.dmub_fb_info); adev->dm.dmub_fb_info = NULL; @@ -2504,7 +2627,6 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { - if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; @@ -2531,9 +2653,9 @@ static int detect_mst_link_for_all_connectors(struct drm_device *dev) return ret; } -static int dm_late_init(void *handle) +static int dm_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct dmcu_iram_parameters params; unsigned int linear_lut[16]; @@ -2579,9 +2701,9 @@ static int dm_late_init(void *handle) static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr) { + u8 buf[UUID_SIZE]; + guid_t guid; int ret; - u8 guid[16]; - u64 tmp64; mutex_lock(&mgr->lock); if (!mgr->mst_primary) @@ -2602,26 +2724,31 @@ static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr) } /* Some hubs forget their guids after they resume */ - ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); - if (ret != 16) { + ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, buf, sizeof(buf)); + if (ret != sizeof(buf)) { drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); goto out_fail; } - if (memchr_inv(guid, 0, 16) == NULL) { - tmp64 = get_jiffies_64(); - memcpy(&guid[0], &tmp64, sizeof(u64)); - memcpy(&guid[8], &tmp64, sizeof(u64)); + import_guid(&guid, buf); - ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16); + if (guid_is_null(&guid)) { + guid_gen(&guid); + export_guid(buf, &guid); - if (ret != 16) { + ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, buf, sizeof(buf)); + + if (ret != sizeof(buf)) { drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n"); goto out_fail; } } - memcpy(mgr->mst_primary->guid, guid, 16); +#ifdef HAVE_DRM_DP_MST_BRANCH_GUID_T + guid_copy(&mgr->mst_primary->guid, &guid); +#else + memcpy(mgr->mst_primary->guid, &guid, 16); +#endif out_fail: mutex_unlock(&mgr->lock); @@ -2636,7 +2763,6 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { - if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; @@ -2667,6 +2793,7 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) } } drm_connector_list_iter_end(&iter); + } static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) @@ -2740,9 +2867,9 @@ static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev) * - Vblank support * - Debug FS entries, if enabled */ -static int dm_hw_init(void *handle) +static int dm_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; /* Create DAL display manager */ @@ -2762,9 +2889,9 @@ static int dm_hw_init(void *handle) * cleanup. This involves cleaning up the DRM device, DC, and any modules that * were loaded. Also flush IRQ workqueues and disable them. */ -static int dm_hw_fini(void *handle) +static int dm_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_dm_hpd_fini(adev); @@ -2868,9 +2995,9 @@ static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm) } } -static int dm_suspend(void *handle) +static int dm_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_display_manager *dm = &adev->dm; int ret = 0; @@ -3056,9 +3183,9 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, kfree(bundle); } -static int dm_resume(void *handle) +static int dm_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct drm_device *ddev = adev_to_drm(adev); struct amdgpu_display_manager *dm = &adev->dm; struct amdgpu_dm_connector *aconnector; @@ -3176,7 +3303,6 @@ static int dm_resume(void *handle) /* Do detection*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { - if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; @@ -3322,7 +3448,6 @@ const struct amdgpu_ip_block_version dm_ip_block = { .funcs = &amdgpu_dm_funcs, }; - /** * DOC: atomic * @@ -3331,22 +3456,29 @@ const struct amdgpu_ip_block_version dm_ip_block = { static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = { .fb_create = amdgpu_display_user_framebuffer_create, +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED .get_format_info = amdgpu_dm_plane_get_format_info, +#endif .atomic_check = amdgpu_dm_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { .atomic_commit_tail = amdgpu_dm_atomic_commit_tail, +#ifdef HAVE_DRM_DP_ATOMIC_SETUP_COMMIT .atomic_commit_setup = drm_dp_mst_atomic_setup_commit, +#endif }; +#ifdef HAVE_HDR_SINK_METADATA static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) { struct amdgpu_dm_backlight_caps *caps; struct drm_connector *conn_base; struct amdgpu_device *adev; +#ifdef HAVE_DRM_DISPLAY_INFO_LUMINANCE_RANGE struct drm_luminance_range_info *luminance_range; +#endif if (aconnector->bl_idx == -1 || aconnector->dc_link->connector_signal != SIGNAL_TYPE_EDP) @@ -3372,6 +3504,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) else if (amdgpu_backlight == 1) caps->aux_support = true; +#ifdef HAVE_DRM_DISPLAY_INFO_LUMINANCE_RANGE luminance_range = &conn_base->display_info.luminance_range; if (luminance_range->max_luminance) { @@ -3381,7 +3514,9 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps->aux_min_input_signal = 0; caps->aux_max_input_signal = 512; } +#endif } +#endif void amdgpu_dm_update_connector_after_detect( struct amdgpu_dm_connector *aconnector) @@ -3509,7 +3644,9 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); amdgpu_dm_update_freesync_caps(connector, aconnector->edid); +#ifdef HAVE_HDR_SINK_METADATA update_connector_ext_caps(aconnector); +#endif } else { drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); amdgpu_dm_update_freesync_caps(connector, NULL); @@ -3777,6 +3914,12 @@ static int register_hpd_handlers(struct amdgpu_device *adev) DRM_ERROR("amdgpu: fail to register dmub hpd callback"); return -EINVAL; } + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, + dmub_hpd_sense_callback, true)) { + DRM_ERROR("amdgpu: fail to register dmub hpd sense callback"); + return -EINVAL; + } } list_for_each_entry(connector, @@ -4386,7 +4529,10 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return -ENOMEM; } - drm_atomic_private_obj_init(adev_to_drm(adev), + drm_atomic_private_obj_init( +#ifdef HAVE_DRM_ATOMIC_PRIVATE_OBJ_INIT_4ARGS + adev_to_drm(adev), +#endif &adev->dm.atomic_obj, &state->base, &dm_atomic_state_funcs); @@ -4418,7 +4564,10 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) #define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 +#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2) +#ifdef HAVE_HDR_SINK_METADATA #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 +#endif static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, int bl_idx) @@ -4432,10 +4581,27 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, return; amdgpu_acpi_get_backlight_caps(&caps); + + /* validate the firmware value is sane */ + if (caps.caps_valid) { + int spread = caps.max_input_signal - caps.min_input_signal; + + if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + caps.min_input_signal < 0 || + spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT || + spread < AMDGPU_DM_MIN_SPREAD) { + DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n", + caps.min_input_signal, caps.max_input_signal); + caps.caps_valid = false; + } + } + if (caps.caps_valid) { dm->backlight_caps[bl_idx].caps_valid = true; +#ifdef HAVE_HDR_SINK_METADATA if (caps.aux_support) return; +#endif dm->backlight_caps[bl_idx].min_input_signal = caps.min_input_signal; dm->backlight_caps[bl_idx].max_input_signal = caps.max_input_signal; } else { @@ -4445,14 +4611,17 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; } #else +#ifdef HAVE_HDR_SINK_METADATA if (dm->backlight_caps[bl_idx].aux_support) return; +#endif dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; #endif } +#ifdef HAVE_HDR_SINK_METADATA static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, unsigned int *min, unsigned int *max) { @@ -4498,19 +4667,25 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min), max - min); } +#endif static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, int bl_idx, u32 user_brightness) { struct amdgpu_dm_backlight_caps caps; +#ifdef HAVE_HDR_SINK_METADATA struct dc_link *link; u32 brightness; - bool rc; +#else + uint32_t brightness = user_brightness; +#endif + bool rc, reallow_idle = false; amdgpu_dm_update_backlight_caps(dm, bl_idx); caps = dm->backlight_caps[bl_idx]; +#ifdef HAVE_HDR_SINK_METADATA dm->brightness[bl_idx] = user_brightness; /* update scratch register */ if (bl_idx == 0) @@ -4519,6 +4694,12 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, link = (struct dc_link *)dm->backlight_link[bl_idx]; /* Change brightness based on AUX property */ + mutex_lock(&dm->dc_lock); + if (dm->dc->caps.ips_support && dm->dc->ctx->dmub_srv->idle_allowed) { + dc_allow_idle_optimizations(dm->dc, false); + reallow_idle = true; + } + if (caps.aux_support) { rc = dc_link_set_backlight_level_nits(link, true, brightness, AUX_BL_DEFAULT_TRANSITION_TIME_MS); @@ -4530,8 +4711,37 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } + if (dm->dc->caps.ips_support && reallow_idle) + dc_allow_idle_optimizations(dm->dc, true); + + mutex_unlock(&dm->dc_lock); + if (rc) dm->actual_brightness[bl_idx] = user_brightness; +#else + /* + * The brightness input is in the range 0-255 + * It needs to be rescaled to be between the + * requested min and max input signal + * + * It also needs to be scaled up by 0x101 to + * match the DC interface which has a range of + * 0 to 0xffff + */ + brightness = + brightness + * 0x101 + * (caps.max_input_signal - caps.min_input_signal) + / AMDGPU_MAX_BL_LEVEL + + caps.min_input_signal * 0x101; + + rc = dc_link_set_backlight_level(dm->backlight_link[bl_idx], brightness, 0); + + if (!rc) + DRM_ERROR("DM: Failed to update backlight on eDP[%d]\n", bl_idx); + if (rc) + dm->actual_brightness[bl_idx] = user_brightness; +#endif } static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) @@ -4560,6 +4770,7 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm, amdgpu_dm_update_backlight_caps(dm, bl_idx); caps = dm->backlight_caps[bl_idx]; +#ifdef HAVE_HDR_SINK_METADATA if (caps.aux_support) { u32 avg, peak; bool rc; @@ -4569,13 +4780,18 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm, return dm->brightness[bl_idx]; return convert_brightness_to_user(&caps, avg); } +#endif ret = dc_link_get_backlight_level(link); if (ret == DC_ERROR_UNEXPECTED) return dm->brightness[bl_idx]; +#ifdef HAVE_HDR_SINK_METADATA return convert_brightness_to_user(&caps, ret); +#else + return ret; +#endif } static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd) @@ -4610,12 +4826,14 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) if (aconnector->bl_idx == -1) return; +#ifdef HAVE_ACPI_VIDEO_BACKLIGHT_USE_NATIVE if (!acpi_video_backlight_use_native()) { drm_info(drm, "Skipping amdgpu DM backlight registration\n"); /* Try registering an ACPI video backlight device instead. */ acpi_video_register_backlight(); return; } +#endif amdgpu_acpi_get_backlight_caps(&caps); if (caps.caps_valid) { @@ -4705,8 +4923,9 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, dm->brightness[bl_idx] = AMDGPU_MAX_BL_LEVEL; dm->backlight_link[bl_idx] = link; dm->num_of_edps++; - +#ifdef HAVE_HDR_SINK_METADATA update_connector_ext_caps(aconnector); +#endif } static void amdgpu_set_panel_orientation(struct drm_connector *connector); @@ -4947,12 +5166,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (psr_feature_enabled) amdgpu_dm_set_psr_caps(link); - - /* TODO: Fix vblank control helpers to delay PSR entry to allow this when - * PSR is also supported. - */ - if (link->psr_settings.psr_feature_enabled) - adev_to_drm(adev)->vblank_disable_immediate = false; } } amdgpu_set_panel_orientation(&aconnector->base); @@ -5037,6 +5250,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm) { +#ifdef AMDKCL_DEVM_DRM_DEV_ALLOC + drm_mode_config_cleanup(dm->ddev); +#endif + drm_atomic_private_obj_fini(&dm->atomic_obj); } @@ -5081,15 +5298,20 @@ static ssize_t s3_debug_store(struct device *device, int s3_state; struct drm_device *drm_dev = dev_get_drvdata(device); struct amdgpu_device *adev = drm_to_adev(drm_dev); + struct amdgpu_ip_block *ip_block; + + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE); + if (!ip_block) + return -EINVAL; ret = kstrtoint(buf, 0, &s3_state); if (ret == 0) { if (s3_state) { - dm_resume(adev); + dm_resume(ip_block); drm_kms_helper_hotplug_event(adev_to_drm(adev)); } else - dm_suspend(adev); + dm_suspend(ip_block); } return ret == 0 ? count : 0; @@ -5161,9 +5383,9 @@ static int dm_init_microcode(struct amdgpu_device *adev) return r; } -static int dm_early_init(void *handle) +static int dm_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_mode_info *mode_info = &adev->mode_info; struct atom_context *ctx = mode_info->atom_context; int index = GetIndexIntoMasterTable(DATA, Object_Header); @@ -5413,9 +5635,11 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, case DRM_FORMAT_NV12: plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb; break; +#ifdef DRM_FORMAT_P010 case DRM_FORMAT_P010: plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb; break; +#endif case DRM_FORMAT_XRGB16161616F: case DRM_FORMAT_ARGB16161616F: plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F; @@ -5424,6 +5648,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, case DRM_FORMAT_ABGR16161616F: plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F; break; +#ifdef DRM_FORMAT_XRGB16161616 case DRM_FORMAT_XRGB16161616: case DRM_FORMAT_ARGB16161616: plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616; @@ -5432,6 +5657,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, case DRM_FORMAT_ABGR16161616: plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616; break; +#endif default: DRM_ERROR( "Unsupported screen format %p4cc\n", @@ -5457,7 +5683,6 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, break; } - plane_info->visible = true; plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; @@ -5767,6 +5992,10 @@ convert_color_depth_from_display_info(const struct drm_connector *connector, { u8 bpc; + bpc = (uint8_t)connector->display_info.bpc; + /* Assume 8 bpc by default if no bpc is specified. */ + bpc = bpc ? bpc : 8; + if (is_y420) { bpc = 8; @@ -5777,10 +6006,6 @@ convert_color_depth_from_display_info(const struct drm_connector *connector, bpc = 12; else if (connector->display_info.hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_30) bpc = 10; - } else { - bpc = (uint8_t)connector->display_info.bpc; - /* Assume 8 bpc by default if no bpc is specified. */ - bpc = bpc ? bpc : 8; } if (requested_bpc > 0) { @@ -5830,6 +6055,7 @@ get_aspect_ratio(const struct drm_display_mode *mode_in) return (enum dc_aspect_ratio) mode_in->picture_aspect_ratio; } +#ifdef HAVE_DRM_CONNECTOR_STATE_COLORSPACE static enum dc_color_space get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing, const struct drm_connector_state *connector_state) @@ -5886,6 +6112,51 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing, return color_space; } +#else +static enum dc_color_space +get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing, + const struct drm_connector_state *connector_state) +{ + enum dc_color_space color_space = COLOR_SPACE_SRGB; + + switch (dc_crtc_timing->pixel_encoding) { + case PIXEL_ENCODING_YCBCR422: + case PIXEL_ENCODING_YCBCR444: + case PIXEL_ENCODING_YCBCR420: + { + /* + * 27030khz is the separation point between HDTV and SDTV + * according to HDMI spec, we use YCbCr709 and YCbCr601 + * respectively + */ + if (dc_crtc_timing->pix_clk_100hz > 270300) { + if (dc_crtc_timing->flags.Y_ONLY) + color_space = + COLOR_SPACE_YCBCR709_LIMITED; + else + color_space = COLOR_SPACE_YCBCR709; + } else { + if (dc_crtc_timing->flags.Y_ONLY) + color_space = + COLOR_SPACE_YCBCR601_LIMITED; + else + color_space = COLOR_SPACE_YCBCR601; + } + + } + break; + case PIXEL_ENCODING_RGB: + color_space = COLOR_SPACE_SRGB; + break; + + default: + WARN_ON(1); + break; + } + + return color_space; +} +#endif static enum display_content_type get_output_content_type(const struct drm_connector_state *connector_state) @@ -6001,7 +6272,13 @@ static void fill_stream_properties_from_drm_display_mode( } if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { +#if defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P) drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, (struct drm_connector *)connector, mode_in); +#elif defined(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_B) + drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, mode_in, false); +#else + drm_hdmi_avi_infoframe_from_display_mode(&avi_frame, mode_in); +#endif /* HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P */ timing_out->vic = avi_frame.video_code; drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in); timing_out->hdmi_vic = hv_frame.vic; @@ -6204,7 +6481,6 @@ static void dm_enable_per_frame_crtc_master_sync(struct dc_state *context) for (i = 0; i < context->stream_count ; i++) { stream = context->streams[i]; - if (!stream) continue; @@ -6275,7 +6551,6 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, return NULL; } } - highest_refresh = drm_mode_vrefresh(m_pref); /* @@ -6298,31 +6573,64 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, return m_pref; } +/* Standard FPS values + * + * 23.976 - TV/NTSC + * 24 - Cinema + * 25 - TV/PAL + * 29.97 - TV/NTSC + * 30 - TV/NTSC + * 48 - Cinema HFR + * 50 - TV/PAL + * 60 - Commonly used + * 48,72,96 - Multiples of 24 + */ +const uint32_t common_rates[] = { 23976, 24000, 25000, 29970, 30000, + 48000, 50000, 60000, 72000, 96000 }; + + static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector) { struct drm_display_mode *high_mode; int timing_diff; + int i; high_mode = get_highest_refresh_rate_mode(aconnector, false); if (!high_mode || !mode) return false; - timing_diff = high_mode->vtotal - mode->vtotal; - if (high_mode->clock == 0 || high_mode->clock != mode->clock || - high_mode->hdisplay != mode->hdisplay || - high_mode->vdisplay != mode->vdisplay || - high_mode->hsync_start != mode->hsync_start || - high_mode->hsync_end != mode->hsync_end || + high_mode->hdisplay != mode->hdisplay || + high_mode->vdisplay != mode->vdisplay || + high_mode->hsync_start != mode->hsync_start || + high_mode->hsync_end != mode->hsync_end || high_mode->htotal != mode->htotal || high_mode->hskew != mode->hskew || high_mode->vscan != mode->vscan || high_mode->vsync_start - mode->vsync_start != timing_diff || high_mode->vsync_end - mode->vsync_end != timing_diff) return false; - else - return true; + + for (i = 0; i < ARRAY_SIZE(common_rates); i++) { + uint64_t target_vtotal, target_vtotal_diff; + uint64_t num, den; + + if (drm_mode_vrefresh(high_mode) * 1000 < common_rates[i]) + continue; + if (common_rates[i] < aconnector->min_vfreq * 1000 || + common_rates[i] > aconnector->max_vfreq * 1000) + continue; + num = (unsigned long long)high_mode->clock * 1000 * 1000; + den = common_rates[i] * (unsigned long long)high_mode->htotal; + target_vtotal = div_u64(num, den); + target_vtotal_diff = target_vtotal - high_mode->vtotal; + + if ((mode->vtotal - target_vtotal_diff) == high_mode->vtotal) + return true; + } + + return false; } #if defined(CONFIG_DRM_AMD_DC_FP) @@ -6416,8 +6724,15 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, struct dc *dc = sink->ctx->dc; u32 max_supported_bw_in_kbps, timing_bw_in_kbps; u32 dsc_max_supported_bw_in_kbps; +#ifdef HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP u32 max_dsc_target_bpp_limit_override = drm_connector->display_info.max_dsc_bpp; +#else + u32 max_dsc_target_bpp_limit_override = 0; + if (stream->link && stream->link->local_sink) + max_dsc_target_bpp_limit_override = + stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit; +#endif struct dc_dsc_config_options dsc_options = {0}; dc_dsc_get_default_config_option(dc, &dsc_options); @@ -6446,7 +6761,8 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_link_get_highest_encoding_format(aconnector->dc_link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); + DRM_DEBUG_DRIVER("%s: SST_DSC [%s] DSC is selected from SST RX\n", + __func__, drm_connector->name); } } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing, @@ -6465,7 +6781,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, dc_link_get_highest_encoding_format(aconnector->dc_link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from DP-HDMI PCON\n", + DRM_DEBUG_DRIVER("%s: SST_DSC [%s] DSC is selected from DP-HDMI PCON\n", __func__, drm_connector->name); } } @@ -6542,7 +6858,6 @@ create_stream_for_sink(struct drm_connector *connector, sink = aconnector->dc_sink; dc_sink_retain(sink); } - stream = dc_create_stream_for_sink(sink); if (stream == NULL) { @@ -6630,7 +6945,6 @@ create_stream_for_sink(struct drm_connector *connector, if (aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE && dsc_caps.is_dsc_supported) apply_dsc_policy_for_stream(aconnector, sink, stream, &dsc_caps); #endif - update_stream_scaling_settings(&mode, dm_state, stream); fill_audio_info( @@ -6646,12 +6960,21 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || stream->signal == SIGNAL_TYPE_EDP) { + const struct dc_edid_caps *edid_caps; + unsigned int disable_colorimetry = 0; + + if (aconnector->dc_sink) { + edid_caps = &aconnector->dc_sink->edid_caps; + disable_colorimetry = edid_caps->panel_patch.disable_colorimetry; + } + // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet // stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 && - stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED; + stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED && + !disable_colorimetry; if (stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; @@ -6862,6 +7185,7 @@ amdgpu_dm_should_create_sysfs(struct amdgpu_dm_connector *amdgpu_dm_connector) if (amdgpu_dm_connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) return false; +#ifdef HAVE_HDR_SINK_METADATA /* check for OLED panels */ if (amdgpu_dm_connector->bl_idx >= 0) { struct drm_device *drm = amdgpu_dm_connector->base.dev; @@ -6872,6 +7196,7 @@ amdgpu_dm_should_create_sysfs(struct amdgpu_dm_connector *amdgpu_dm_connector) if (caps->aux_support) return false; } +#endif return true; } @@ -6885,7 +7210,6 @@ static void amdgpu_dm_connector_unregister(struct drm_connector *connector) drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux); } - static void amdgpu_dm_connector_destroy(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -6919,7 +7243,6 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) kfree(aconnector->i2c); } kfree(aconnector->dm_dp_aux.aux.name); - kfree(connector); } @@ -6941,8 +7264,10 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->underscan_hborder = 0; state->underscan_vborder = 0; state->base.max_requested_bpc = 8; +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) state->vcpi_slots = 0; state->pbn = 0; +#endif if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { if (amdgpu_dm_abm_level <= 0) @@ -6975,8 +7300,10 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector) new_state->underscan_enable = state->underscan_enable; new_state->underscan_hborder = state->underscan_hborder; new_state->underscan_vborder = state->underscan_vborder; +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) new_state->vcpi_slots = state->vcpi_slots; new_state->pbn = state->pbn; +#endif return &new_state->base; } @@ -7011,6 +7338,7 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector) return 0; } +#ifdef HAVE_DRM_CONNECTOR_EDID_OVERRIDE static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -7048,6 +7376,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) &dc_em_sink->edid_caps); } } +#endif static const struct drm_connector_funcs amdgpu_dm_connector_funcs = { .reset = amdgpu_dm_connector_funcs_reset, @@ -7060,7 +7389,9 @@ static const struct drm_connector_funcs amdgpu_dm_connector_funcs = { .atomic_get_property = amdgpu_dm_connector_atomic_get_property, .late_register = amdgpu_dm_connector_late_register, .early_unregister = amdgpu_dm_connector_unregister, +#ifdef HAVE_DRM_CONNECTOR_EDID_OVERRIDE .force = amdgpu_dm_connector_funcs_force +#endif }; static int get_modes(struct drm_connector *connector) @@ -7301,6 +7632,7 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec return result; } +#ifdef HDMI_DRM_INFOFRAME_SIZE static int fill_hdr_info_packet(const struct drm_connector_state *state, struct dc_info_packet *out) { @@ -7362,23 +7694,33 @@ static int fill_hdr_info_packet(const struct drm_connector_state *state, static int amdgpu_dm_connector_atomic_check(struct drm_connector *conn, +#ifdef HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE struct drm_atomic_state *state) { struct drm_connector_state *new_con_state = drm_atomic_get_new_connector_state(state, conn); +#else + struct drm_connector_state *new_con_state) +{ + struct drm_atomic_state *state = new_con_state->state; +#endif struct drm_connector_state *old_con_state = drm_atomic_get_old_connector_state(state, conn); struct drm_crtc *crtc = new_con_state->crtc; struct drm_crtc_state *new_crtc_state; +#ifdef HAVE_DRM_DP_MST_ROOT_CONN_ATOMIC_CHECK struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn); +#endif int ret; trace_amdgpu_dm_connector_atomic_check(new_con_state); if (conn->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { +#ifdef HAVE_DRM_DP_MST_ROOT_CONN_ATOMIC_CHECK ret = drm_dp_mst_root_conn_atomic_check(new_con_state, &aconn->mst_mgr); if (ret < 0) return ret; +#endif } if (!crtc) @@ -7429,6 +7771,22 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn, return 0; } +#endif + +static struct drm_encoder *amdgpu_dm_connector_to_encoder(struct drm_connector *connector) +{ +#ifdef HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS + struct drm_encoder *encoder; + + /* There is only one encoder per connector */ + drm_connector_for_each_possible_encoder(connector, encoder) + return encoder; + + return NULL; +#else + return drm_encoder_find(connector->dev, NULL, connector->encoder_ids[0]); +#endif +} static const struct drm_connector_helper_funcs amdgpu_dm_connector_helper_funcs = { @@ -7440,7 +7798,10 @@ amdgpu_dm_connector_helper_funcs = { */ .get_modes = get_modes, .mode_valid = amdgpu_dm_connector_mode_valid, +#ifdef HDMI_DRM_INFOFRAME_SIZE .atomic_check = amdgpu_dm_connector_atomic_check, +#endif + .best_encoder = amdgpu_dm_connector_to_encoder }; static void dm_encoder_helper_disable(struct drm_encoder *encoder) @@ -7473,6 +7834,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) struct drm_atomic_state *state = crtc_state->state; struct drm_connector *connector = conn_state->connector; struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -7480,7 +7842,9 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, const struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT) || defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION) struct drm_dp_mst_topology_state *mst_state; +#endif enum dc_color_depth color_depth; int clock, bpp = 0; bool is_y420 = false; @@ -7494,11 +7858,16 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (!crtc_state->connectors_changed && !crtc_state->mode_changed) return 0; +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT) || defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION) mst_state = drm_atomic_get_mst_topology_state(state, mst_mgr); if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link)); +#else + mst_state->pbn_div = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link); +#endif +#endif if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; @@ -7510,16 +7879,21 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, max_bpc); bpp = convert_dc_color_depth_into_bpc(color_depth) * 3; clock = adjusted_mode->clock; - dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp << 4); + dm_new_connector_state->pbn = drm_dp_calc_pbn_mode(clock, bpp, false); } dm_new_connector_state->vcpi_slots = drm_dp_atomic_find_time_slots(state, mst_mgr, mst_port, - dm_new_connector_state->pbn); + dm_new_connector_state->pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , dm_mst_get_pbn_divider(aconnector->dc_link) +#endif + ); if (dm_new_connector_state->vcpi_slots < 0) { DRM_DEBUG_ATOMIC("failed finding vcpi slots: %d\n", (int)dm_new_connector_state->vcpi_slots); return dm_new_connector_state->vcpi_slots; } +#endif return 0; } @@ -7528,6 +7902,7 @@ const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs = { .atomic_check = dm_encoder_helper_atomic_check }; +#if defined(HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC) static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, struct dc_state *dc_state, struct dsc_mst_fairness_vars *vars) @@ -7588,14 +7963,22 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, dm_conn_state->vcpi_slots = slot_num; ret = drm_dp_mst_atomic_enable_dsc(state, aconnector->mst_output_port, - dm_conn_state->pbn, false); + dm_conn_state->pbn, +#ifdef HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC_WITH_5_ARGS + 0, +#endif + false); if (ret < 0) return ret; continue; } - vcpi = drm_dp_mst_atomic_enable_dsc(state, aconnector->mst_output_port, pbn, true); + vcpi = drm_dp_mst_atomic_enable_dsc(state, aconnector->mst_output_port, pbn, +#ifdef HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC_WITH_5_ARGS + 0, +#endif + true); if (vcpi < 0) return vcpi; @@ -7604,6 +7987,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } return 0; } +#endif static int to_drm_connector_type(enum signal_type st) { @@ -7630,17 +8014,6 @@ static int to_drm_connector_type(enum signal_type st) } } -static struct drm_encoder *amdgpu_dm_connector_to_encoder(struct drm_connector *connector) -{ - struct drm_encoder *encoder; - - /* There is only one encoder per connector */ - drm_connector_for_each_possible_encoder(connector, encoder) - return encoder; - - return NULL; -} - static void amdgpu_dm_get_native_mode(struct drm_connector *connector) { struct drm_encoder *encoder; @@ -7688,6 +8061,7 @@ amdgpu_dm_create_common_mode(struct drm_encoder *encoder, mode->hdisplay = hdisplay; mode->vdisplay = vdisplay; mode->type &= ~DRM_MODE_TYPE_PREFERRED; + strscpy(mode->name, name, DRM_DISPLAY_MODE_LEN); return mode; @@ -7880,6 +8254,10 @@ static uint add_fs_modes(struct amdgpu_dm_connector *aconnector) num = (unsigned long long)m->clock * 1000 * 1000; den = common_rates[i] * (unsigned long long)m->htotal; target_vtotal = div_u64(num, den); + + if (target_vtotal < m->vtotal) + continue; + target_vtotal_diff = target_vtotal - m->vtotal; /* Check for illegal modes */ @@ -8049,13 +8427,18 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, if (connector_type == DRM_MODE_CONNECTOR_HDMIA || connector_type == DRM_MODE_CONNECTOR_DisplayPort || connector_type == DRM_MODE_CONNECTOR_eDP) { + drm_connector_attach_hdr_output_metadata_property(&aconnector->base); if (!aconnector->mst_root) drm_connector_attach_vrr_capable_property(&aconnector->base); if (adev->dm.hdcp_workqueue) +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE drm_connector_attach_content_protection_property(&aconnector->base, true); +#else + drm_connector_attach_content_protection_property(&aconnector->base); +#endif } } @@ -8248,12 +8631,42 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, - bool enable) -{ - if (enable) + struct dm_crtc_state *acrtc_state) +{ +#ifdef HAVE_DRM_VBLANK_CRTC_CONFIG + struct drm_vblank_crtc_config config = {0}; + struct dc_crtc_timing *timing; + int offdelay; + + if (acrtc_state) { + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < + IP_VERSION(3, 5, 0) || + acrtc_state->stream->link->psr_settings.psr_version < + DC_PSR_VERSION_UNSUPPORTED) { + timing = &acrtc_state->stream->timing; + + /* at least 2 frames */ + offdelay = DIV64_U64_ROUND_UP((u64)20 * + timing->v_total * + timing->h_total, + timing->pix_clk_100hz); + + config.offdelay_ms = offdelay ?: 30; + } else { + config.disable_immediate = true; + } + + drm_crtc_vblank_on_config(&acrtc->base, + &config); + } else { + drm_crtc_vblank_off(&acrtc->base); + } +#else + if (acrtc_state) drm_crtc_vblank_on(&acrtc->base); else drm_crtc_vblank_off(&acrtc->base); +#endif } static void dm_update_pflip_irq_state(struct amdgpu_device *adev, @@ -8318,6 +8731,7 @@ static bool is_content_protection_different(struct drm_crtc_state *new_crtc_stat new_crtc_state->active_changed, new_crtc_state->connectors_changed); +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE /* hdcp content type change */ if (old_conn_state->hdcp_content_type != new_conn_state->hdcp_content_type && new_conn_state->content_protection != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { @@ -8325,6 +8739,7 @@ static bool is_content_protection_different(struct drm_crtc_state *new_crtc_stat pr_debug("[HDCP_DM] Type0/1 change %s :true\n", __func__); return true; } +#endif /* CP is being re enabled, ignore this */ if (old_conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED && @@ -8845,7 +9260,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, afb->tmz_surface, false); drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n", - new_plane_state->plane->index, + drm_plane_index(new_plane_state->plane), bundle->plane_infos[planes_count].dcc.enable); bundle->surface_updates[planes_count].plane_info = @@ -8889,7 +9304,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * dm_crtc_helper_atomic_check() only accepts async flips with * fast updates. */ +#if defined(HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP) if (crtc->state->async_flip && +#else + if ((crtc->state->pageflip_flags & + DRM_MODE_PAGE_FLIP_ASYNC) != 0 && +#endif (acrtc_state->update_type != UPDATE_TYPE_FAST || get_mem_type(old_plane_state->fb) != get_mem_type(fb))) drm_warn_once(state->dev, @@ -8897,7 +9317,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, plane->base.id, plane->name); bundle->flip_addrs[planes_count].flip_immediate = +#if defined(HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP) crtc->state->async_flip && +#else + (crtc->state->pageflip_flags & + DRM_MODE_PAGE_FLIP_ASYNC) != 0 && +#endif acrtc_state->update_type == UPDATE_TYPE_FAST && get_mem_type(old_plane_state->fb) == get_mem_type(fb); @@ -9297,7 +9722,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { + new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); @@ -9305,7 +9730,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, if (old_crtc_state->active && (!new_crtc_state->active || drm_atomic_crtc_needs_modeset(new_crtc_state))) { - manage_dm_interrupts(adev, acrtc, false); + manage_dm_interrupts(adev, acrtc, NULL); dc_stream_release(dm_old_crtc_state->stream); } } @@ -9567,7 +9992,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) trace_amdgpu_dm_atomic_commit_tail_begin(state); drm_atomic_helper_update_legacy_modeset_state(dev, state); +#ifdef HAVE_DRM_DP_ATOMIC_WAIT_FOR_DEPENDENCIES drm_dp_mst_atomic_wait_for_dependencies(state); +#endif dm_state = dm_atomic_get_new_state(state); if (dm_state && dm_state->context) { @@ -9673,6 +10100,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (new_con_state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) enable_encryption = true; +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE if (aconnector->dc_link && aconnector->dc_sink && aconnector->dc_link->type == dc_connection_mst_branch) { struct hdcp_workqueue *hdcp_work = adev->dm.hdcp_workqueue; @@ -9684,6 +10112,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) hdcp_w->content_protection[connector->index] = new_con_state->content_protection; } +#endif if (new_crtc_state && new_crtc_state->mode_changed && new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED) @@ -9694,7 +10123,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (aconnector->dc_link) hdcp_update_display( adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, - new_con_state->hdcp_content_type, enable_encryption); +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE + new_con_state->hdcp_content_type, +#else + DRM_MODE_HDCP_CONTENT_TYPE0, +#endif + enable_encryption); } } @@ -9705,9 +10139,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); struct dc_surface_update *dummy_updates; struct dc_stream_update stream_update; - struct dc_info_packet hdr_packet; struct dc_stream_status *status = NULL; - bool abm_changed, hdr_changed, scaling_changed; +#ifdef HDMI_DRM_INFOFRAME_SIZE + struct dc_info_packet hdr_packet; + bool hdr_changed; +#endif + bool abm_changed, scaling_changed; memset(&stream_update, 0, sizeof(stream_update)); @@ -9729,10 +10166,16 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) abm_changed = dm_new_crtc_state->abm_level != dm_old_crtc_state->abm_level; +#ifdef HDMI_DRM_INFOFRAME_SIZE hdr_changed = !drm_connector_atomic_hdr_metadata_equal(old_con_state, new_con_state); +#endif - if (!scaling_changed && !abm_changed && !hdr_changed) + if (!scaling_changed && !abm_changed +#ifdef HDMI_DRM_INFOFRAME_SIZE + && !hdr_changed +#endif + ) continue; stream_update.stream = dm_new_crtc_state->stream; @@ -9750,10 +10193,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) stream_update.abm_level = &dm_new_crtc_state->abm_level; } +#ifdef HDMI_DRM_INFOFRAME_SIZE if (hdr_changed) { fill_hdr_info_packet(new_con_state, &hdr_packet); stream_update.hdr_static_metadata = &hdr_packet; } +#endif status = dc_stream_get_status(dm_new_crtc_state->stream); @@ -9821,7 +10266,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_crtc_needs_modeset(new_crtc_state))) { dc_stream_retain(dm_new_crtc_state->stream); acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; - manage_dm_interrupts(adev, acrtc, true); + manage_dm_interrupts(adev, acrtc, dm_new_crtc_state); } /* Handle vrr on->off / off->on transitions */ amdgpu_dm_handle_vrr_transition(dm_old_crtc_state, dm_new_crtc_state); @@ -9857,7 +10302,11 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) +#if defined(HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP) if (new_crtc_state->async_flip) +#else + if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) +#endif wait_for_vblank = false; /* update planes when needed per crtc*/ @@ -10265,10 +10714,12 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level; +#ifdef HDMI_DRM_INFOFRAME_SIZE ret = fill_hdr_info_packet(drm_new_conn_state, &new_stream->hdr_static_metadata); if (ret) goto fail; +#endif /* * If we already removed the old stream from the context @@ -10519,7 +10970,7 @@ static bool should_reset_plane(struct drm_atomic_state *state, * TODO: We can likely skip bandwidth validation if the only thing that * changed about the plane was it'z z-ordering. */ - if (new_crtc_state->zpos_changed) + if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos) return true; if (drm_atomic_crtc_needs_modeset(new_crtc_state)) @@ -10679,7 +11130,6 @@ static int dm_check_native_cursor_state(struct drm_crtc *new_plane_crtc, struct drm_plane_state *new_plane_state, bool enable) { - struct amdgpu_crtc *new_acrtc; int ret; @@ -10742,7 +11192,6 @@ static int dm_update_plane_state(struct dc *dc, bool *lock_and_validation_needed, bool *is_top_most_overlay) { - struct dm_atomic_state *dm_state = NULL; struct drm_crtc *new_plane_crtc, *old_plane_crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; @@ -10992,6 +11441,7 @@ static inline struct __drm_planes_state *__get_next_zpos( (old_plane_state) = __i->old_state, \ (new_plane_state) = __i->new_state, 1)) +#if defined(HAVE_DRM_DP_MST_ADD_AFFECTED_DSC_CRTCS) static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm_crtc *crtc) { struct drm_connector *connector; @@ -11021,6 +11471,7 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm return drm_dp_mst_add_affected_dsc_crtcs(state, &aconnector->mst_root->mst_mgr); } +#endif /** * DOC: Cursor Modes - Native vs Overlay @@ -11230,7 +11681,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_dp_mst_topology_mgr *mgr; struct drm_dp_mst_topology_state *mst_state; struct dsc_mst_fairness_vars vars[MAX_PIPES] = {0}; - trace_amdgpu_dm_atomic_check_begin(state); ret = drm_atomic_helper_check_modeset(dev, state); @@ -11260,6 +11710,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, new_crtc_state->connectors_changed = true; } +#if defined(HAVE_DRM_DP_MST_ADD_AFFECTED_DSC_CRTCS) if (dc_resource_is_dsc_encoding_supported(dc)) { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (drm_atomic_crtc_needs_modeset(new_crtc_state)) { @@ -11271,9 +11722,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } } } +#endif + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - if (!drm_atomic_crtc_needs_modeset(new_crtc_state) && !new_crtc_state->color_mgmt_changed && old_crtc_state->vrr_enabled == new_crtc_state->vrr_enabled && @@ -11367,6 +11819,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, drm_dbg(dev, "Failed to determine cursor mode\n"); goto fail; } + + /* + * If overlay cursor is needed, DC cannot go through the + * native cursor update path. All enabled planes on the CRTC + * need to be added for DC to not disable a plane by mistake + */ + if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) { + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + goto fail; + } } /* Remove exiting planes if they are modified */ @@ -11428,12 +11891,14 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } } +#if defined(HAVE_DRM_DP_MST_ATOMIC_CHECK) #if defined(CONFIG_DRM_AMD_DC_FP) if (dc_resource_is_dsc_encoding_supported(dc)) { ret = pre_validate_dsc(state, &dm_state, vars); if (ret != 0) goto fail; } +#endif #endif /* Run this here since we want to validate the streams we created */ @@ -11496,6 +11961,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } } +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) && defined(HAVE_DRM_DP_MST_ATOMIC_CHECK) + /* Perform validation of MST topology in the state*/ + ret = drm_dp_mst_atomic_check(state); + if (ret) + goto fail; +#endif + if (state->legacy_cursor_update) { /* * This is a fast cursor update coming from the plane update @@ -11538,6 +12010,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, lock_and_validation_needed = true; } +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_STATE_TOTAL_AVAIL_SLOTS /* set the slot info for each mst_state based on the link encoding format */ for_each_new_mst_mgr_in_state(state, mgr, mst_state, i) { struct amdgpu_dm_connector *aconnector; @@ -11545,6 +12018,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_connector_list_iter iter; u8 link_coding_cap; +#if !defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT) && !defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION) + if (!mgr->mst_state ) + continue; +#endif drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { if (connector->index == mst_state->mgr->conn_base_id) { @@ -11557,7 +12034,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } drm_connector_list_iter_end(&iter); } - +#endif /** * Streams and planes are reset when there are changes that affect * bandwidth. Anything that affects bandwidth needs to go through @@ -11584,22 +12061,26 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } +#ifdef HAVE_DRM_DP_MST_ATOMIC_CHECK #if defined(CONFIG_DRM_AMD_DC_FP) if (dc_resource_is_dsc_encoding_supported(dc)) { ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); if (ret) { - drm_dbg_atomic(dev, "compute_mst_dsc_configs_for_state() failed\n"); + drm_dbg_atomic(dev, "MST_DSC compute_mst_dsc_configs_for_state() failed\n"); ret = -EINVAL; goto fail; } } #endif +#endif +#if defined(HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC) ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); if (ret) { drm_dbg_atomic(dev, "dm_update_mst_vcpi_slots_for_dsc() failed\n"); goto fail; } +#endif /* * Perform validation of MST topology in the state: @@ -11607,11 +12088,15 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, * dc_validate_global_state(), or there is a chance * to get stuck in an infinite loop and hang eventually. */ +#ifdef HAVE_DRM_DP_MST_ATOMIC_CHECK +#if defined(HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC) ret = drm_dp_mst_atomic_check(state); if (ret) { - drm_dbg_atomic(dev, "drm_dp_mst_atomic_check() failed\n"); + drm_dbg_atomic(dev, "MST drm_dp_mst_atomic_check() failed\n"); goto fail; } +#endif +#endif status = dc_validate_global_state(dc, dm_state->context, true); if (status != DC_OK) { drm_dbg_atomic(dev, "DC global validation failure: %s (%d)", @@ -11671,7 +12156,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, * Only allow async flips for fast updates that don't change * the FB pitch, the DCC state, rotation, etc. */ - if (new_crtc_state->async_flip && lock_and_validation_needed) { +#if defined(HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP) + if (new_crtc_state->async_flip && +#else + if ((new_crtc_state->pageflip_flags & + DRM_MODE_PAGE_FLIP_ASYNC) != 0 && +#endif + lock_and_validation_needed) { drm_dbg_atomic(crtc->dev, "[CRTC:%d:%s] async flips are only supported for fast updates\n", crtc->base.id, crtc->name); @@ -11833,6 +12324,7 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, return ret; } +#ifdef HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE static void parse_edid_displayid_vrr(struct drm_connector *connector, struct edid *edid) { @@ -11875,6 +12367,7 @@ static void parse_edid_displayid_vrr(struct drm_connector *connector, j++; } } +#endif static int parse_amd_vsdb(struct amdgpu_dm_connector *aconnector, struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info) @@ -11984,8 +12477,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, amdgpu_dm_connector->min_vfreq = 0; amdgpu_dm_connector->max_vfreq = 0; +#ifdef HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE connector->display_info.monitor_range.min_vfreq = 0; connector->display_info.monitor_range.max_vfreq = 0; +#endif freesync_capable = false; goto update; @@ -11996,10 +12491,12 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (!adev->dm.freesync_module) goto update; +#ifdef HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE /* Some eDP panels only have the refresh rate range info in DisplayID */ if ((connector->display_info.monitor_range.min_vfreq == 0 || connector->display_info.monitor_range.max_vfreq == 0)) parse_edid_displayid_vrr(connector, edid); +#endif if (edid && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP)) { @@ -12008,11 +12505,13 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (amdgpu_dm_connector->dc_link && amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { +#ifdef HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) freesync_capable = true; +#endif } else { edid_check_required = edid->version > 1 || (edid->version == 1 && @@ -12040,9 +12539,12 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (range->flags != 1) continue; + +#ifdef HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE connector->display_info.monitor_range.min_vfreq = range->min_vfreq; connector->display_info.monitor_range.max_vfreq = range->max_vfreq; + if (edid->revision >= 4) { if (data->pad2 & DRM_EDID_RANGE_OFFSET_MIN_VFREQ) connector->display_info.monitor_range.min_vfreq += 255; @@ -12054,6 +12556,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; +#else + amdgpu_dm_connector->min_vfreq = range->min_vfreq; + amdgpu_dm_connector->max_vfreq = range->max_vfreq; +#endif break; } @@ -12075,16 +12581,15 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, } else if (edid && sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) { i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); if (i >= 0 && vsdb_info.freesync_supported) { - timing = &edid->detailed_timings[i]; - data = &timing->data.other_data; - amdgpu_dm_connector->min_vfreq = vsdb_info.min_refresh_rate_hz; amdgpu_dm_connector->max_vfreq = vsdb_info.max_refresh_rate_hz; if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) freesync_capable = true; +#ifdef HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz; connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz; +#endif } } @@ -12104,8 +12609,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) freesync_capable = true; +#ifdef HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE connector->display_info.monitor_range.min_vfreq = vsdb_info.min_refresh_rate_hz; connector->display_info.monitor_range.max_vfreq = vsdb_info.max_refresh_rate_hz; +#endif } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h old mode 100644 new mode 100755 index 2d7755e2b6c32..45f63822e7847 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,7 +50,7 @@ #define AMDGPU_DM_MAX_NUM_EDP 2 -#define AMDGPU_DMUB_NOTIFICATION_MAX 6 +#define AMDGPU_DMUB_NOTIFICATION_MAX 7 #define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A #define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40 @@ -157,6 +157,7 @@ struct idle_workqueue { * Describe the backlight support for ACPI or eDP AUX. */ struct amdgpu_dm_backlight_caps { +#ifdef HAVE_HDR_SINK_METADATA /** * @ext_caps: Keep the data struct with all the information about the * display support for HDR. @@ -171,6 +172,7 @@ struct amdgpu_dm_backlight_caps { * in nits. */ u32 aux_max_input_signal; +#endif /** * @min_input_signal: minimum possible input in range 0-255. */ @@ -186,7 +188,9 @@ struct amdgpu_dm_backlight_caps { /** * @aux_support: Describes if the display supports AUX backlight. */ +#ifdef HAVE_HDR_SINK_METADATA bool aux_support; +#endif /** * @ac_level: the default brightness if booted on AC */ @@ -418,6 +422,13 @@ struct amdgpu_display_manager { */ bool audio_registered; + /** + * @vblank_lock: + * + * Guards access to deferred vblank work state. + */ + spinlock_t vblank_lock; + /** * @irq_handler_list_low_tab: * @@ -916,8 +927,10 @@ struct dm_connector_state { bool freesync_capable; bool update_hdcp; uint8_t abm_level; +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) int vcpi_slots; uint64_t pbn; +#endif }; #define to_dm_connector_state(x)\ @@ -1004,6 +1017,9 @@ void *dm_allocate_gpu_mem(struct amdgpu_device *adev, enum dc_gpu_mem_alloc_type type, size_t size, long long *addr); +void dm_free_gpu_mem(struct amdgpu_device *adev, + enum dc_gpu_mem_alloc_type type, + void *addr); bool amdgpu_dm_is_headless(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 748e80ef40d0a..1682659bc8036 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -79,6 +79,7 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); + int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, size_t *values_cnt); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index a2cf2c066a76d..ce3bb6eee157e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -154,6 +154,7 @@ static void amdgpu_dm_crtc_set_panel_sr_feature( amdgpu_dm_psr_enable(vblank_work->stream); if (dm->idle_workqueue && + (dm->dc->config.disable_ips == DMUB_IPS_ENABLE) && dm->dc->idle_optimizations_allowed && dm->idle_workqueue->enable && !dm->idle_workqueue->running) @@ -542,6 +543,9 @@ amdgpu_dm_atomic_crtc_get_property(struct drm_crtc *crtc, static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .reset = amdgpu_dm_crtc_reset_state, .destroy = amdgpu_dm_crtc_destroy, +#ifndef HAVE_STRUCT_DRM_CRTC_FUNCS_GAMMA_SET_OPTIONAL + .gamma_set = drm_atomic_helper_legacy_gamma_set, +#endif .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = amdgpu_dm_crtc_duplicate_state, @@ -549,10 +553,12 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .set_crc_source = amdgpu_dm_crtc_set_crc_source, .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_dm_crtc_enable_vblank, .disable_vblank = amdgpu_dm_crtc_disable_vblank, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP + .get_vblank_counter = amdgpu_get_vblank_counter_kms, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#endif #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, #endif @@ -621,10 +627,15 @@ static bool amdgpu_dm_crtc_helper_mode_fixup(struct drm_crtc *crtc, } static int amdgpu_dm_crtc_helper_atomic_check(struct drm_crtc *crtc, +#ifdef HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE struct drm_atomic_state *state) { struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); +#else + struct drm_crtc_state *crtc_state) +{ +#endif struct amdgpu_device *adev = drm_to_adev(crtc->dev); struct dc *dc = adev->dm.dc; struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); @@ -651,18 +662,6 @@ static int amdgpu_dm_crtc_helper_atomic_check(struct drm_crtc *crtc, return -EINVAL; } - /* - * Only allow async flips for fast updates that don't change the FB - * pitch, the DCC state, rotation, etc. - */ - if (crtc_state->async_flip && - dm_crtc_state->update_type != UPDATE_TYPE_FAST) { - drm_dbg_atomic(crtc->dev, - "[CRTC:%d:%s] async flips are only supported for fast updates\n", - crtc->base.id, crtc->name); - return -EINVAL; - } - /* In some use cases, like reset, no stream is attached */ if (!dm_crtc_state->stream) return 0; @@ -678,7 +677,9 @@ static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = { .disable = amdgpu_dm_crtc_helper_disable, .atomic_check = amdgpu_dm_crtc_helper_atomic_check, .mode_fixup = amdgpu_dm_crtc_helper_mode_fixup, +#ifdef HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP .get_scanout_position = amdgpu_crtc_get_scanout_position, +#endif }; int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index db56b0aa54545..a71e0cd90cd6f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -25,7 +25,7 @@ #include #include - +#include #include "dc.h" #include "amdgpu.h" #include "amdgpu_dm.h" @@ -338,6 +338,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, return size; } +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE static bool dp_mst_is_end_device(struct amdgpu_dm_connector *aconnector) { bool is_end_device = false; @@ -484,7 +485,7 @@ static ssize_t dp_mst_link_setting(struct file *f, const char __user *buf, kfree(wr_buf); return size; } - +#endif /* function: get current DP PHY settings: voltage swing, pre-emphasis, * post-cursor2 (defined by VESA DP specification) * @@ -568,6 +569,7 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf, return result; } +#ifdef DEFINE_DEBUGFS_ATTRIBUTE static int dp_lttpr_status_show(struct seq_file *m, void *unused) { struct drm_connector *connector = m->private; @@ -602,6 +604,7 @@ static int dp_lttpr_status_show(struct seq_file *m, void *unused) seq_puts(m, "\n"); return 0; } +#endif static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) @@ -1529,7 +1532,6 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -1543,8 +1545,6 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -1558,10 +1558,9 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_clock_en); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -1719,7 +1718,6 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -1733,8 +1731,6 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -1748,10 +1744,9 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_slice_width); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -1907,7 +1902,6 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -1921,8 +1915,6 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -1936,10 +1928,9 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_slice_height); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -2091,7 +2082,6 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -2105,8 +2095,6 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -2120,10 +2108,9 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_bits_per_pixel); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -2270,7 +2257,6 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -2284,8 +2270,6 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -2299,10 +2283,9 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_pic_width); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -2328,7 +2311,6 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -2342,8 +2324,6 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -2357,10 +2337,9 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_pic_height); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -2401,7 +2380,6 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -2415,8 +2393,6 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -2430,10 +2406,9 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_chunk_size); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -2474,7 +2449,6 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; - char *rd_buf_ptr = NULL; struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private; struct display_stream_compressor *dsc; struct dcn_dsc_state dsc_state = {0}; @@ -2488,8 +2462,6 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, if (!rd_buf) return -ENOMEM; - rd_buf_ptr = rd_buf; - for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && @@ -2503,10 +2475,9 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, if (dsc) dsc->funcs->dsc_read_state(dsc, &dsc_state); - snprintf(rd_buf_ptr, str_len, + snprintf(rd_buf, str_len, "%d\n", dsc_state.dsc_slice_bpg_offset); - rd_buf_ptr += str_len; while (size) { if (*pos >= rd_buf_size) @@ -2528,7 +2499,6 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, return result; } - /* * function description: Read max_requested_bpc property from the connector * @@ -2765,6 +2735,7 @@ static int target_backlight_show(struct seq_file *m, void *unused) * cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector * */ +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE static int dp_is_mst_connector_show(struct seq_file *m, void *unused) { struct drm_connector *connector = m->private; @@ -2801,6 +2772,7 @@ static int dp_is_mst_connector_show(struct seq_file *m, void *unused) return 0; } +#endif /* * function description: Read out the mst progress status @@ -2860,13 +2832,17 @@ static int is_dpia_link_show(struct seq_file *m, void *data) DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SHOW_ATTRIBUTE(dp_lttpr_status); +#endif DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(odm_combine_segments); DEFINE_SHOW_ATTRIBUTE(replay_capability); DEFINE_SHOW_ATTRIBUTE(psr_capability); +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); +#endif DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); DEFINE_SHOW_ATTRIBUTE(is_dpia_link); @@ -2966,12 +2942,13 @@ static const struct file_operations dp_dsc_disable_passthrough_debugfs_fops = { .write = dp_dsc_passthrough_set, .llseek = default_llseek }; - +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE static const struct file_operations dp_mst_link_settings_debugfs_fops = { .owner = THIS_MODULE, .write = dp_mst_link_setting, .llseek = default_llseek }; +#endif static const struct { char *name; @@ -2979,7 +2956,9 @@ static const struct { } dp_debugfs_entries[] = { {"link_settings", &dp_link_settings_debugfs_fops}, {"phy_settings", &dp_phy_settings_debugfs_fop}, +#ifdef DEFINE_DEBUGFS_ATTRIBUTE {"lttpr_status", &dp_lttpr_status_fops}, +#endif {"test_pattern", &dp_phy_test_pattern_fops}, {"hdcp_sink_capability", &hdcp_sink_capability_fops}, {"sdp_message", &sdp_message_fops}, @@ -2994,10 +2973,14 @@ static const struct { {"dp_dsc_fec_support", &dp_dsc_fec_support_fops}, {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE {"is_mst_connector", &dp_is_mst_connector_fops}, +#endif {"mst_progress_status", &dp_mst_progress_status_fops}, {"is_dpia_link", &is_dpia_link_fops}, +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE {"mst_link_settings", &dp_mst_link_settings_debugfs_fops} +#endif }; static const struct { @@ -3031,8 +3014,10 @@ static int force_yuv420_output_get(void *data, u64 *val) return 0; } +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(force_yuv420_output_fops, force_yuv420_output_get, force_yuv420_output_set, "%llu\n"); +#endif /* * Read Replay state @@ -3108,6 +3093,7 @@ static int allow_edp_hotplug_detection_set(void *data, u64 val) return 0; } +#if defined(DEFINE_DEBUGFS_ATTRIBUTE) /* check if kernel disallow eDP enter psr state * cat /sys/kernel/debug/dri/0/eDP-X/disallow_edp_enter_psr * 0: allow edp enter psr; 1: disallow @@ -3233,7 +3219,7 @@ static int dmub_trace_mask_show(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(dmub_trace_mask_fops, dmub_trace_mask_show, dmub_trace_mask_set, "0x%llx\n"); - +#endif /* * Set dmcub trace event IRQ enable or disable. * Usage to enable dmcub trace event IRQ: echo 1 > /sys/kernel/debug/dri/0/amdgpu_dm_dmcub_trace_event_en @@ -3265,6 +3251,7 @@ static int dmcub_trace_event_state_get(void *data, u64 *val) return 0; } +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_get, dmcub_trace_event_state_set, "%llu\n"); @@ -3281,6 +3268,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(allow_edp_hotplug_detection_fops, DEFINE_DEBUGFS_ATTRIBUTE(disallow_edp_enter_psr_fops, disallow_edp_enter_psr_get, disallow_edp_enter_psr_set, "%llu\n"); +#endif DEFINE_SHOW_ATTRIBUTE(current_backlight); DEFINE_SHOW_ATTRIBUTE(target_backlight); @@ -3290,7 +3278,9 @@ static const struct { char *name; const struct file_operations *fops; } connector_debugfs_entries[] = { +#ifdef DEFINE_DEBUGFS_ATTRIBUTE {"force_yuv420_output", &force_yuv420_output_fops}, +#endif {"trigger_hotplug", &trigger_hotplug_debugfs_fops}, {"internal_display", &internal_display_fops}, {"odm_combine_segments", &odm_combine_segments_fops} @@ -3443,6 +3433,8 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) dp_debugfs_entries[i].fops); } } + +#ifdef DEFINE_DEBUGFS_ATTRIBUTE if (connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) { debugfs_create_file("replay_capability", 0444, dir, connector, &replay_capability_fops); @@ -3462,6 +3454,7 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) debugfs_create_file("disallow_edp_enter_psr", 0644, dir, connector, &disallow_edp_enter_psr_fops); } +#endif for (i = 0; i < ARRAY_SIZE(connector_debugfs_entries); i++) { debugfs_create_file(connector_debugfs_entries[i].name, @@ -3780,6 +3773,7 @@ static int mst_topo_show(struct seq_file *m, void *unused) return 0; } +#ifdef DEFINE_DEBUGFS_ATTRIBUTE /* * Sets trigger hpd for MST topologies. * All connected connectors will be rediscovered and re started as needed if val of 1 is sent. @@ -3853,7 +3847,7 @@ static int trigger_hpd_mst_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(trigger_hpd_mst_ops, trigger_hpd_mst_get, trigger_hpd_mst_set, "%llu\n"); - +#endif /* * Sets the force_timing_sync debug option from the given string. @@ -3884,10 +3878,13 @@ static int force_timing_sync_get(void *data, u64 *val) return 0; } +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(force_timing_sync_ops, force_timing_sync_get, force_timing_sync_set, "%llu\n"); +#endif +#ifdef DEFINE_DEBUGFS_ATTRIBUTE /* * Disables all HPD and HPD RX interrupt handling in the * driver when set to 1. Default is 0. @@ -3917,6 +3914,7 @@ static int disable_hpd_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(disable_hpd_ops, disable_hpd_get, disable_hpd_set, "%llu\n"); +#endif /* * Prints hardware capabilities. These are used for IGT testing. @@ -3969,6 +3967,8 @@ static int dp_force_sst_get(void *data, u64 *val) return 0; } + +#if defined(DEFINE_DEBUGFS_ATTRIBUTE) DEFINE_DEBUGFS_ATTRIBUTE(dp_set_mst_en_for_sst_ops, dp_force_sst_get, dp_force_sst_set, "%llu\n"); @@ -3995,6 +3995,7 @@ static int dp_ignore_cable_id_get(void *data, u64 *val) } DEFINE_DEBUGFS_ATTRIBUTE(dp_ignore_cable_id_ops, dp_ignore_cable_id_get, dp_ignore_cable_id_set, "%llu\n"); +#endif /* * Sets the DC visual confirm debug option from the given string. @@ -4023,6 +4024,7 @@ static int visual_confirm_get(void *data, u64 *val) } DEFINE_SHOW_ATTRIBUTE(mst_topo); +#ifdef DEFINE_DEBUGFS_ATTRIBUTE DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get, visual_confirm_set, "%llu\n"); @@ -4059,6 +4061,7 @@ static int skip_detection_link_training_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(skip_detection_link_training_fops, skip_detection_link_training_get, skip_detection_link_training_set, "%llu\n"); +#endif /* * Dumps the DCC_EN bit for each pipe. @@ -4145,11 +4148,12 @@ void dtn_debugfs_init(struct amdgpu_device *adev) adev, &capabilities_fops); debugfs_create_file("amdgpu_dm_dtn_log", 0644, root, adev, &dtn_log_fops); + +#ifdef DEFINE_DEBUGFS_ATTRIBUTE debugfs_create_file("amdgpu_dm_dp_set_mst_en_for_sst", 0644, root, adev, &dp_set_mst_en_for_sst_ops); debugfs_create_file("amdgpu_dm_dp_ignore_cable_id", 0644, root, adev, &dp_ignore_cable_id_ops); - debugfs_create_file_unsafe("amdgpu_dm_visual_confirm", 0644, root, adev, &visual_confirm_fops); @@ -4183,4 +4187,5 @@ void dtn_debugfs_init(struct amdgpu_device *adev) if (adev->dm.dc->caps.ips_support) debugfs_create_file_unsafe("amdgpu_dm_ips_status", 0644, root, adev, &ips_status_fops); +#endif } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index e339c7a8d541c..f0bc72614588e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -231,9 +231,11 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, if (conn_state && conn_state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED) { conn_state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE DRM_DEBUG_DRIVER("[HDCP_DM] display %d, CP 2 -> 1, type %u, DPMS %u\n", aconnector->base.index, conn_state->hdcp_content_type, aconnector->base.dpms); +#endif } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); @@ -336,6 +338,7 @@ static void event_property_update(struct work_struct *work) } if (hdcp_work->encryption_status[conn_index] != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE if (conn_state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 && hdcp_work->encryption_status[conn_index] <= @@ -350,6 +353,9 @@ static void event_property_update(struct work_struct *work) drm_hdcp_update_content_protection(connector, DRM_MODE_CONTENT_PROTECTION_ENABLED); } +#else + drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED); +#endif } else { DRM_DEBUG_DRIVER("[HDCP_DM] DRM_MODE_CONTENT_PROTECTION_DESIRED\n"); drm_hdcp_update_content_protection(connector, @@ -550,6 +556,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) link->adjust.hdcp1.disable = 0; hdcp_w->encryption_status[display->index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE DRM_DEBUG_DRIVER("[HDCP_DM] display %d, CP %d, type %d\n", aconnector->base.index, (!!aconnector->base.state) ? aconnector->base.state->content_protection : -1, @@ -562,7 +569,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) process_output(hdcp_w); mutex_unlock(&hdcp_w->mutex); - +#endif } /** diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 165e010fe69c8..8f58dbb28ee39 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -45,6 +45,41 @@ #include "dm_helpers.h" #include "ddc_service_types.h" +#ifndef HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP +struct monitor_patch_info { + unsigned int manufacturer_id; + unsigned int product_id; + void (*patch_func)(struct dc_edid_caps *edid_caps, unsigned int param); + unsigned int patch_param; +}; +static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param); + +static const struct monitor_patch_info monitor_patch_table[] = { +{0x6D1E, 0x5BBF, set_max_dsc_bpp_limit, 15}, +{0x6D1E, 0x5B9A, set_max_dsc_bpp_limit, 15}, +}; + +static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param) +{ + if (edid_caps) + edid_caps->panel_patch.max_dsc_target_bpp_limit = param; +} + +static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps) +{ + int i, ret = 0; + + for (i = 0; i < ARRAY_SIZE(monitor_patch_table); i++) + if ((edid_caps->manufacturer_id == monitor_patch_table[i].manufacturer_id) + && (edid_caps->product_id == monitor_patch_table[i].product_id)) { + monitor_patch_table[i].patch_func(edid_caps, monitor_patch_table[i].patch_param); + ret++; + } + + return ret; +} +#endif + static u32 edid_extract_panel_id(struct edid *edid) { return (u32)edid->mfg_id[0] << 24 | @@ -73,6 +108,10 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; + case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154): + DRM_DEBUG_DRIVER("Disabling VSC on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.disable_colorimetry = true; + break; default: return; } @@ -121,7 +160,12 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->display_name, AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS); +#if defined(HAVE_DRM_DISPLAY_INFO_IS_HDMI) edid_caps->edid_hdmi = connector->display_info.is_hdmi; +#else + edid_caps->edid_hdmi = drm_detect_hdmi_monitor( + (struct edid *) edid->raw_edid); +#endif apply_edid_quirks(edid_buf, edid_caps); @@ -154,9 +198,14 @@ enum dc_edid_status dm_helpers_parse_edid_caps( kfree(sads); kfree(sadb); +#ifndef HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP + amdgpu_dm_patch_edid_caps(edid_caps); +#endif + return result; } +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS) static void fill_dc_mst_payload_table_from_drm(struct dc_link *link, bool enable, @@ -207,12 +256,50 @@ fill_dc_mst_payload_table_from_drm(struct dc_link *link, /* Overwrite the old table */ *table = new_table; } +#else +static void +fill_dc_mst_payload_table_from_drm(struct amdgpu_dm_connector *aconnector, + struct dc_dp_mst_stream_allocation_table *proposed_table) +{ + int i; + struct drm_dp_mst_topology_mgr *mst_mgr = + &aconnector->mst_root->mst_mgr; + + mutex_lock(&mst_mgr->payload_lock); + + proposed_table->stream_count = 0; + + /* number of active streams */ + for (i = 0; i < mst_mgr->max_payloads; i++) { + if (mst_mgr->payloads[i].num_slots == 0) + break; /* end of vcp_id table */ + + ASSERT(mst_mgr->payloads[i].payload_state != + DP_PAYLOAD_DELETE_LOCAL); + + if (mst_mgr->payloads[i].payload_state == DP_PAYLOAD_LOCAL || + mst_mgr->payloads[i].payload_state == + DP_PAYLOAD_REMOTE) { + + struct dc_dp_mst_stream_allocation *sa = + &proposed_table->stream_allocations[ + proposed_table->stream_count]; + + sa->slot_count = mst_mgr->payloads[i].num_slots; + sa->vcp_id = mst_mgr->proposed_vcpis[i]->vcpi; + proposed_table->stream_count++; + } + } + + mutex_unlock(&mst_mgr->payload_lock); +} +#endif /*HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS*/ void dm_helpers_dp_update_branch_info( struct dc_context *ctx, const struct dc_link *link) {} - +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS) static void dm_helpers_construct_old_payload( struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state, @@ -220,7 +307,12 @@ static void dm_helpers_construct_old_payload( struct drm_dp_mst_atomic_payload *old_payload) { struct drm_dp_mst_atomic_payload *pos; +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION int pbn_per_slot = dfixed_trunc(mst_state->pbn_div); +#elif HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT + int pbn_per_slot = mst_state->pbn_div; +#endif + u8 next_payload_vc_start = mgr->next_start_slot; u8 payload_vc_start = new_payload->vc_start_slot; u8 allocated_time_slots; @@ -244,6 +336,7 @@ static void dm_helpers_construct_old_payload( old_payload->time_slots = allocated_time_slots; old_payload->pbn = allocated_time_slots * pbn_per_slot; } +#endif /* * Writes payload allocation table in immediate downstream device. @@ -255,9 +348,26 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( bool enable) { struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS) struct drm_dp_mst_topology_state *mst_state; struct drm_dp_mst_atomic_payload *target_payload, *new_payload, old_payload; - struct drm_dp_mst_topology_mgr *mst_mgr; +#else +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) + struct dm_connector_state *dm_conn_state; +#endif + struct drm_dp_mst_port *mst_port; +#if !defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) + int slots = 0; +#endif + bool ret; +#if !defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) + int clock; + int bpp = 0; + int pbn = 0; +#endif + u8 link_coding_cap = DP_8b_10b_ENCODING; +#endif /*HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS*/ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; /* Accessing the connector state is required for vcpi_slots allocation @@ -270,6 +380,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( return false; mst_mgr = &aconnector->mst_root->mst_mgr; +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS) mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); @@ -283,8 +394,11 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( dm_helpers_construct_old_payload(mst_mgr, mst_state, new_payload, &old_payload); target_payload = &old_payload; - +#ifdef HAVE_DRM_DP_REMOVE_RAYLOAD_PART drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload); +#else + drm_dp_remove_payload(mst_mgr, mst_state, &old_payload, new_payload); +#endif } /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or @@ -293,7 +407,81 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( * sequence. copy DRM MST allocation to dc */ fill_dc_mst_payload_table_from_drm(stream->link, enable, target_payload, proposed_table); - +#else +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) + dm_conn_state = to_dm_connector_state(aconnector->base.state); +#endif + if (!mst_mgr->mst_state) + return false; + + mst_port = aconnector->mst_output_port; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(aconnector->dc_link); + + if (enable) { + +#if !defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) + clock = stream->timing.pix_clk_100hz / 10; + + switch (stream->timing.display_color_depth) { + + case COLOR_DEPTH_666: + bpp = 6; + break; + case COLOR_DEPTH_888: + bpp = 8; + break; + case COLOR_DEPTH_101010: + bpp = 10; + break; + case COLOR_DEPTH_121212: + bpp = 12; + break; + case COLOR_DEPTH_141414: + bpp = 14; + break; + case COLOR_DEPTH_161616: + bpp = 16; + break; + default: + ASSERT(bpp != 0); + break; + } + + bpp = bpp * 3; + + /* TODO need to know link rate */ + pbn = drm_dp_calc_pbn_mode(clock, bpp, false); + + slots = drm_dp_find_vcpi_slots(mst_mgr, pbn); + ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port, pbn, + slots); +#else + ret = drm_dp_mst_allocate_vcpi(mst_mgr, mst_port, + dm_conn_state->pbn, + dm_conn_state->vcpi_slots); +#endif + if (!ret) + return false; + + } else { + drm_dp_mst_reset_vcpi_slots(mst_mgr, mst_port); + } + + /* It's OK for this to fail */ +#ifdef HAVE_DRM_DP_UPDATE_PAYLOAD_PART1_START_SLOT_ARG + drm_dp_update_payload_part1(mst_mgr, (link_coding_cap == DP_CAP_ANSI_128B132B) ? 0:1); +#else + drm_dp_update_payload_part1(mst_mgr); +#endif + + /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or + * AUX message. The sequence is slot 1-63 allocated sequence for each + * stream. AMD ASIC stream slot allocation should follow the same + * sequence. copy DRM MST allocation to dc */ + + fill_dc_mst_payload_table_from_drm(aconnector, proposed_table); +#endif return true; } @@ -348,9 +536,13 @@ void dm_helpers_dp_mst_send_payload_allocation( const struct dc_stream_state *stream) { struct amdgpu_dm_connector *aconnector; +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS) struct drm_dp_mst_topology_state *mst_state; - struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_atomic_payload *new_payload; +#else + struct drm_dp_mst_port *mst_port; +#endif + struct drm_dp_mst_topology_mgr *mst_mgr; enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD; enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD; int ret = 0; @@ -361,11 +553,21 @@ void dm_helpers_dp_mst_send_payload_allocation( return; mst_mgr = &aconnector->mst_root->mst_mgr; +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS) mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); - ret = drm_dp_add_payload_part2(mst_mgr, new_payload); +#else + mst_port = aconnector->mst_output_port; + if (!mst_mgr->mst_state) + return; +#endif +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS) + ret = drm_dp_add_payload_part2(mst_mgr, new_payload); +#else + ret = drm_dp_update_payload_part2(mst_mgr); +#endif if (ret) { amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, false); @@ -382,9 +584,13 @@ void dm_helpers_dp_mst_update_mst_mgr_for_deallocation( const struct dc_stream_state *stream) { struct amdgpu_dm_connector *aconnector; - struct drm_dp_mst_topology_state *mst_state; struct drm_dp_mst_topology_mgr *mst_mgr; +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS + struct drm_dp_mst_topology_state *mst_state; struct drm_dp_mst_atomic_payload *new_payload, old_payload; +#else + struct drm_dp_mst_port *mst_port; +#endif enum mst_progress_status set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; enum mst_progress_status clr_flag = MST_ALLOCATE_NEW_PAYLOAD; @@ -394,15 +600,26 @@ void dm_helpers_dp_mst_update_mst_mgr_for_deallocation( return; mst_mgr = &aconnector->mst_root->mst_mgr; +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); +#ifdef HAVE_DRM_DP_REMOVE_RAYLOAD_PART dm_helpers_construct_old_payload(mst_mgr, mst_state, new_payload, &old_payload); drm_dp_remove_payload_part2(mst_mgr, mst_state, &old_payload, new_payload); +#endif +#else + mst_port = aconnector->mst_output_port; + if (!mst_mgr->mst_state) + return; +#endif amdgpu_dm_set_mst_status(&aconnector->mst_status, set_flag, true); amdgpu_dm_set_mst_status(&aconnector->mst_status, clr_flag, false); +#ifndef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS + drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); +#endif } void dm_dtn_log_begin(struct dc_context *ctx, @@ -677,8 +894,11 @@ static bool execute_synaptics_rc_command(struct drm_dp_aux *aux, // read rc data drm_dp_dpcd_read(aux, SYNAPTICS_RC_DATA, data, length); } - +#ifdef HAVE_DRM_DP_AUX_DRM_DEV drm_dbg_dp(aux->drm_dev, "success = %d\n", success); +#else + DRM_DEBUG_KMS("%s: success = %d\n", __func__, success); +#endif return success; } @@ -686,9 +906,11 @@ static bool execute_synaptics_rc_command(struct drm_dp_aux *aux, static void apply_synaptics_fifo_reset_wa(struct drm_dp_aux *aux) { unsigned char data[16] = {0}; - +#ifdef HAVE_DRM_DP_AUX_DRM_DEV drm_dbg_dp(aux->drm_dev, "Start\n"); - +#else + DRM_DEBUG_KMS("Start %s\n", __func__); +#endif // Step 2 data[0] = 'P'; data[1] = 'R'; @@ -744,8 +966,11 @@ static void apply_synaptics_fifo_reset_wa(struct drm_dp_aux *aux) // Step 6 if (!execute_synaptics_rc_command(aux, true, 0x02, 0, 0, NULL)) return; - +#ifdef HAVE_DRM_DP_AUX_DRM_DEV drm_dbg_dp(aux->drm_dev, "Done\n"); +#else + DRM_DEBUG_KMS("Done %s\n", __func__); +#endif } /* MST Dock */ @@ -757,9 +982,12 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst( bool enable) { uint8_t ret = 0; - +#ifdef HAVE_DRM_DP_AUX_DRM_DEV drm_dbg_dp(aux->drm_dev, - "Configure DSC to non-virtual dpcd synaptics\n"); + "MST_DSC Configure DSC to non-virtual dpcd synaptics\n"); +#else + DRM_DEBUG_KMS("MST_DSC Configure DSC to non-virtual dpcd synaptics\n"); +#endif if (enable) { /* When DSC is enabled on previous boot and reboot with the hub, @@ -772,7 +1000,7 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst( apply_synaptics_fifo_reset_wa(aux); ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1); - DRM_INFO("Send DSC enable to synaptics\n"); + DRM_INFO("MST_DSC Send DSC enable to synaptics\n"); } else { /* Synaptics hub not support virtual dpcd, @@ -781,7 +1009,7 @@ static uint8_t write_dsc_enable_synaptics_non_virtual_dpcd_mst( */ if (!stream->link->link_status.link_active) { ret = drm_dp_dpcd_write(aux, DP_DSC_ENABLE, &enable, 1); - DRM_INFO("Send DSC disable to synaptics\n"); + DRM_INFO("MST_DSC Send DSC disable to synaptics\n"); } } @@ -793,16 +1021,20 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable) { + struct amdgpu_dm_connector *aconnector = + (struct amdgpu_dm_connector *)stream->dm_stream_context; + struct drm_device *dev = aconnector->base.dev; +#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX) static const uint8_t DSC_DISABLE; static const uint8_t DSC_DECODING = 0x01; static const uint8_t DSC_PASSTHROUGH = 0x02; - struct amdgpu_dm_connector *aconnector = - (struct amdgpu_dm_connector *)stream->dm_stream_context; - struct drm_device *dev = aconnector->base.dev; struct drm_dp_mst_port *port; uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE; uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE; +#else + uint8_t enable_dsc = enable ? 1 : 0; +#endif uint8_t ret = 0; if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { @@ -815,6 +1047,7 @@ bool dm_helpers_dp_write_dsc_enable( return write_dsc_enable_synaptics_non_virtual_dpcd_mst( aconnector->dsc_aux, stream, enable_dsc); +#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX) port = aconnector->mst_output_port; if (enable) { @@ -823,14 +1056,14 @@ bool dm_helpers_dp_write_dsc_enable( DP_DSC_ENABLE, &enable_passthrough, 1); drm_dbg_dp(dev, - "Sent DSC pass-through enable to virtual dpcd port, ret = %u\n", + "MST_DSC Sent DSC pass-through enable to virtual dpcd port, ret = %u\n", ret); } ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Sent DSC decoding enable to %s port, ret = %u\n", + "MST_DSC Sent DSC decoding enable to %s port, ret = %u\n", (port->passthrough_aux) ? "remote RX" : "virtual dpcd", ret); @@ -838,7 +1071,7 @@ bool dm_helpers_dp_write_dsc_enable( ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Sent DSC decoding disable to %s port, ret = %u\n", + "MST_DSC Sent DSC decoding disable to %s port, ret = %u\n", (port->passthrough_aux) ? "remote RX" : "virtual dpcd", ret); @@ -848,22 +1081,27 @@ bool dm_helpers_dp_write_dsc_enable( DP_DSC_ENABLE, &enable_passthrough, 1); drm_dbg_dp(dev, - "Sent DSC pass-through disable to virtual dpcd port, ret = %u\n", + "MST_DSC Sent DSC pass-through disable to virtual dpcd port, ret = %u\n", ret); } } +#else + ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); + DRM_DEBUG_KMS("Send DSC %s to MST RX\n", enable_dsc ? "enable" : "disable"); +#endif + } if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) { if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE) { ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Send DSC %s to SST RX\n", + "SST_DSC Send DSC %s to SST RX\n", enable_dsc ? "enable" : "disable"); } else if (stream->sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { ret = dm_helpers_dp_write_dpcd(ctx, stream->link, DP_DSC_ENABLE, &enable_dsc, 1); drm_dbg_dp(dev, - "Send DSC %s to DP-HDMI PCON\n", + "SST_DSC Send DSC %s to DP-HDMI PCON\n", enable_dsc ? "enable" : "disable"); } } @@ -893,7 +1131,9 @@ enum dc_edid_status dm_helpers_read_local_edid( struct dc_sink *sink) { struct amdgpu_dm_connector *aconnector = link->priv; +#ifdef HAVE_DRM_DP_SEND_REAL_EDID_CHECKSUM struct drm_connector *connector = &aconnector->base; +#endif struct i2c_adapter *ddc; int retry = 3; enum dc_edid_status edid_status; @@ -911,6 +1151,7 @@ enum dc_edid_status dm_helpers_read_local_edid( edid = drm_get_edid(&aconnector->base, ddc); +#ifdef HAVE_DRM_DP_SEND_REAL_EDID_CHECKSUM /* DP Compliance Test 4.2.2.6 */ if (link->aux_mode && connector->edid_corrupt) drm_dp_send_real_edid_checksum(&aconnector->dm_dp_aux.aux, connector->real_edid_checksum); @@ -919,6 +1160,7 @@ enum dc_edid_status dm_helpers_read_local_edid( connector->edid_corrupt = false; return EDID_BAD_CHECKSUM; } +#endif if (!edid) return EDID_NO_RESPONSE; @@ -966,7 +1208,6 @@ enum dc_edid_status dm_helpers_read_local_edid( DP_TEST_RESPONSE, &test_response.raw, sizeof(test_response)); - } return edid_status; @@ -1050,17 +1291,8 @@ void dm_helpers_free_gpu_mem( void *pvMem) { struct amdgpu_device *adev = ctx->driver_context; - struct dal_allocation *da; - - /* walk the da list in DM */ - list_for_each_entry(da, &adev->dm.da_list, list) { - if (pvMem == da->cpu_ptr) { - amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr); - list_del(&da->list); - kfree(da); - break; - } - } + + dm_free_gpu_mem(adev, type, pvMem); } bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable) @@ -1297,4 +1529,4 @@ bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream { // TODO return false; -} \ No newline at end of file +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 2e9f6da1acdca..dd7117db38479 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -116,26 +116,46 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, return result; } +#ifndef HAVE_DRM_DP_MST_DETECT_PORT_PPPP +static enum drm_connector_status +dm_dp_mst_detect(struct drm_connector *connector, bool force) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_dm_connector *master = aconnector->mst_root; + + enum drm_connector_status status = + drm_dp_mst_detect_port( + connector, + &master->mst_mgr, + aconnector->mst_output_port); + return status; +} +#endif static void dm_dp_mst_connector_destroy(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); +#ifndef HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR if (aconnector->dc_sink) { dc_link_remove_remote_sink(aconnector->dc_link, aconnector->dc_sink); dc_sink_release(aconnector->dc_sink); } +#endif kfree(aconnector->edid); drm_connector_cleanup(connector); +#if defined(HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC) drm_dp_mst_put_port_malloc(aconnector->mst_output_port); +#endif /* HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC */ kfree(aconnector); } +#if defined(HAVE_DRM_DP_MST_CONNECTOR_LATE_REGISTER) static int amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) { @@ -154,7 +174,9 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) return 0; } +#endif /* HAVE_DRM_DP_MST_CONNECTOR_LATE_REGISTER */ +#if defined(HAVE_DRM_DP_MST_CONNECTOR_EARLY_UNREGISTER) static void amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) { @@ -184,14 +206,20 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) aconnector->dc_sink = NULL; aconnector->edid = NULL; aconnector->dsc_aux = NULL; +#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX port->passthrough_aux = NULL; +#endif } aconnector->mst_status = MST_STATUS_DEFAULT; drm_modeset_unlock(&root->mst_mgr.base.lock); } +#endif /* HAVE_DRM_DP_MST_CONNECTOR_EARLY_UNREGISTER */ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { +#ifndef HAVE_DRM_DP_MST_DETECT_PORT_PPPP + .detect = dm_dp_mst_detect, +#endif .fill_modes = drm_helper_probe_single_connector_modes, .destroy = dm_dp_mst_connector_destroy, .reset = amdgpu_dm_connector_funcs_reset, @@ -199,8 +227,12 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_set_property = amdgpu_dm_connector_atomic_set_property, .atomic_get_property = amdgpu_dm_connector_atomic_get_property, +#if defined(HAVE_DRM_DP_MST_CONNECTOR_LATE_REGISTER) .late_register = amdgpu_dm_mst_connector_late_register, +#endif /* HAVE_DRM_DP_MST_CONNECTOR_LATE_REGISTER */ +#if defined(HAVE_DRM_DP_MST_CONNECTOR_EARLY_UNREGISTER) .early_unregister = amdgpu_dm_mst_connector_early_unregister, +#endif /* HAVE_DRM_DP_MST_CONNECTOR_EARLY_UNREGISTER */ }; bool needs_dsc_aux_workaround(struct dc_link *link) @@ -229,6 +261,7 @@ static bool is_synaptics_cascaded_panamera(struct dc_link *link, struct drm_dp_m return false; } +#if defined(HAVE_DRM_DP_MST_DSC_AUX_FOR_PORT) static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector) { struct dc_sink *dc_sink = aconnector->dc_sink; @@ -238,7 +271,6 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto u8 *dsc_branch_dec_caps = NULL; aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port); - /* * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs * because it only check the dsc/fec caps of the "port variable" and not the dock @@ -253,7 +285,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto aconnector->dsc_aux = &aconnector->mst_root->dm_dp_aux.aux; /* synaptics cascaded MST hub case */ - if (!aconnector->dsc_aux && is_synaptics_cascaded_panamera(aconnector->dc_link, port)) + if (is_synaptics_cascaded_panamera(aconnector->dc_link, port)) aconnector->dsc_aux = port->mgr->aux; if (!aconnector->dsc_aux) @@ -274,6 +306,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto return true; } #endif +#endif static bool retrieve_downstream_port_device(struct amdgpu_dm_connector *aconnector) { @@ -387,6 +420,7 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) * plugged back with same display index, its hdcp properties * will be retrieved from hdcp_work within dm_dp_mst_get_modes */ +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE if (aconnector->dc_sink && connector->state) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -402,17 +436,19 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) hdcp_w->content_protection[connector->index]; } } +#endif if (aconnector->dc_sink) { amdgpu_dm_update_freesync_caps( connector, aconnector->edid); +#if defined(HAVE_DRM_DP_MST_DSC_AUX_FOR_PORT) #if defined(CONFIG_DRM_AMD_DC_FP) if (!validate_dsc_caps_on_connector(aconnector)) memset(&aconnector->dc_sink->dsc_caps, 0, sizeof(aconnector->dc_sink->dsc_caps)); #endif - +#endif if (!retrieve_downstream_port_device(aconnector)) memset(&aconnector->mst_downstream_port_present, 0, sizeof(aconnector->mst_downstream_port_present)); @@ -429,16 +465,22 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) static struct drm_encoder * dm_mst_atomic_best_encoder(struct drm_connector *connector, +#ifdef HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_BEST_ENCODER_ARG_DRM_ATOMIC_STATE struct drm_atomic_state *state) { struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state, connector); +#else + struct drm_connector_state *connector_state) +{ +#endif struct amdgpu_device *adev = drm_to_adev(connector->dev); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc); return &adev->dm.mst_encoders[acrtc->crtc_id].base; } +#ifdef HAVE_DRM_DP_MST_DETECT_PORT_PPPP static int dm_dp_mst_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) @@ -502,7 +544,9 @@ dm_dp_mst_detect(struct drm_connector *connector, aconnector->dc_sink = NULL; aconnector->edid = NULL; aconnector->dsc_aux = NULL; +#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX port->passthrough_aux = NULL; +#endif amdgpu_dm_set_mst_status(&aconnector->mst_status, MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, @@ -511,23 +555,47 @@ dm_dp_mst_detect(struct drm_connector *connector, return connection_status; } +#endif +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) static int dm_dp_mst_atomic_check(struct drm_connector *connector, struct drm_atomic_state *state) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); struct drm_dp_mst_topology_mgr *mst_mgr = &aconnector->mst_root->mst_mgr; struct drm_dp_mst_port *mst_port = aconnector->mst_output_port; +#ifndef HAVE_DRM_DP_ATOMIC_RELEASE_TIME_SLOTS + struct drm_connector_state *new_conn_state = + drm_atomic_get_new_connector_state(state, connector); + struct drm_connector_state *old_conn_state = + drm_atomic_get_old_connector_state(state, connector); + struct drm_crtc_state *new_crtc_state; + + if (!old_conn_state->crtc) + return 0; + if (new_conn_state->crtc) { + new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc); + if (!new_crtc_state || + !drm_atomic_crtc_needs_modeset(new_crtc_state) || + new_crtc_state->enable) + return 0; + } +#endif return drm_dp_atomic_release_time_slots(state, mst_mgr, mst_port); } +#endif static const struct drm_connector_helper_funcs dm_dp_mst_connector_helper_funcs = { .get_modes = dm_dp_mst_get_modes, .mode_valid = amdgpu_dm_connector_mode_valid, .atomic_best_encoder = dm_mst_atomic_best_encoder, +#ifdef HAVE_DRM_DP_MST_DETECT_PORT_PPPP .detect_ctx = dm_dp_mst_detect, +#endif +#if defined(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE) .atomic_check = dm_dp_mst_atomic_check, +#endif }; static void amdgpu_dm_encoder_destroy(struct drm_encoder *encoder) @@ -578,6 +646,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, if (!aconnector) return NULL; + DRM_DEBUG_DRIVER("%s: Create aconnector 0x%p for port 0x%p\n", __func__, aconnector, port); + connector = &aconnector->base; aconnector->mst_output_port = port; aconnector->mst_root = master; @@ -622,10 +692,12 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, &connector->base, dev->mode_config.tile_property, 0); + +#ifdef HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY connector->colorspace_property = master->base.colorspace_property; if (connector->colorspace_property) drm_connector_attach_colorspace_property(connector); - +#endif drm_connector_set_path_property(connector, pathprop); /* @@ -634,7 +706,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, */ amdgpu_dm_connector_funcs_reset(connector); +#if defined(HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC) drm_dp_mst_get_port_malloc(port); +#endif /* HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC */ return connector; } @@ -707,6 +781,7 @@ void dm_handle_mst_sideband_msg_ready_event( /* handle MST irq */ if (aconnector->mst_mgr.mst_state) +#ifdef HAVE_DRM_DP_MST_HPD_IRQ_HANDLE_EVENT drm_dp_mst_hpd_irq_handle_event(&aconnector->mst_mgr, esi, ack, @@ -730,6 +805,29 @@ void dm_handle_mst_sideband_msg_ready_event( } drm_dp_mst_hpd_irq_send_new_request(&aconnector->mst_mgr); +#else + drm_dp_mst_hpd_irq( + &aconnector->mst_mgr, + esi, + &new_irq_handled); + + if (new_irq_handled) { + /* ACK at DPCD to notify down stream */ + const int ack_dpcd_bytes_to_write = + dpcd_bytes_to_read - 1; + + for (retry = 0; retry < 3; retry++) { + u8 wret; + + wret = drm_dp_dpcd_write( + &aconnector->dm_dp_aux.aux, + dpcd_addr + 1, + &esi[1], + ack_dpcd_bytes_to_write); + if (wret == ack_dpcd_bytes_to_write) + break; + } +#endif new_irq_handled = false; } else { @@ -743,14 +841,75 @@ void dm_handle_mst_sideband_msg_ready_event( DRM_DEBUG_DRIVER("Loop exceeded max iterations\n"); } +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_CBS_POLL_HPD_IRQ static void dm_handle_mst_down_rep_msg_ready(struct drm_dp_mst_topology_mgr *mgr) { dm_handle_mst_sideband_msg_ready_event(mgr, DOWN_REP_MSG_RDY_EVENT); } +#endif + +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR +static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, + struct drm_connector *connector) +{ + struct amdgpu_dm_connector *master = container_of(mgr, struct amdgpu_dm_connector, mst_mgr); + struct drm_device *dev = master->base.dev; + struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + + DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n", + aconnector, connector->base.id, aconnector->mst_root); + + if (aconnector->dc_sink) { + amdgpu_dm_update_freesync_caps(connector, NULL); + dc_link_remove_remote_sink(aconnector->dc_link, + aconnector->dc_sink); + dc_sink_release(aconnector->dc_sink); + aconnector->dc_sink = NULL; + mutex_lock(&mgr->lock); + if (!mgr->mst_state) + aconnector->dc_link->cur_link_settings.lane_count = 0; + mutex_unlock(&mgr->lock); + } + drm_connector_unregister(connector); + drm_connector_put(connector); +} +#endif + +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_CBS_HOTPLUG) +static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) +{ + struct amdgpu_dm_connector *master = container_of(mgr, struct amdgpu_dm_connector, mst_mgr); + struct drm_device *dev = master->base.dev; + + drm_kms_helper_hotplug_event(dev); +} +#endif + +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_CBS_REGISTER_CONNECTOR +static void dm_dp_mst_register_connector(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + + drm_connector_register(connector); +} +#endif static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { .add_connector = dm_dp_add_mst_connector, +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR + .destroy_connector = dm_dp_destroy_mst_connector, +#endif +#if defined(HAVE_DRM_DP_MST_TOPOLOGY_CBS_HOTPLUG) + .hotplug = dm_dp_mst_hotplug, +#endif +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_CBS_REGISTER_CONNECTOR + .register_connector = dm_dp_mst_register_connector +#endif +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_CBS_POLL_HPD_IRQ .poll_hpd_irq = dm_handle_mst_down_rep_msg_ready, +#endif }; void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, @@ -763,7 +922,9 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, kasprintf(GFP_KERNEL, "AMDGPU DM aux hw bus %d", link_index); aconnector->dm_dp_aux.aux.transfer = dm_dp_aux_transfer; +#ifdef HAVE_DRM_DP_AUX_DRM_DEV aconnector->dm_dp_aux.aux.drm_dev = dm->ddev; +#endif aconnector->dm_dp_aux.ddc_service = aconnector->dc_link->ddc; drm_dp_aux_init(&aconnector->dm_dp_aux.aux); @@ -776,7 +937,12 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, dc_link_dp_get_max_link_enc_cap(aconnector->dc_link, &max_link_enc_cap); aconnector->mst_mgr.cbs = &dm_mst_cbs; drm_dp_mst_topology_mgr_init(&aconnector->mst_mgr, adev_to_drm(dm->adev), - &aconnector->dm_dp_aux.aux, 16, 4, aconnector->connector_id); + &aconnector->dm_dp_aux.aux, 16, 4, +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_MGR_INIT_MAX_LANE_COUNT + max_link_enc_cap.lane_count, + drm_dp_bw_code_to_link_rate(max_link_enc_cap.link_rate), +#endif + aconnector->connector_id); drm_connector_attach_dp_subconnector_property(&aconnector->base); } @@ -790,6 +956,7 @@ int dm_mst_get_pbn_divider(struct dc_link *link) dc_link_get_link_cap(link)) / (8 * 1000 * 54); } +#if defined(HAVE_DRM_DP_MST_ATOMIC_CHECK) struct dsc_mst_fairness_params { struct dc_crtc_timing *timing; struct dc_sink *sink; @@ -802,6 +969,7 @@ struct dsc_mst_fairness_params { uint32_t bpp_overwrite; struct amdgpu_dm_connector *aconnector; }; +#endif #if defined(CONFIG_DRM_AMD_DC_FP) static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) @@ -826,6 +994,7 @@ static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } +#if defined(HAVE_DRM_DP_MST_ATOMIC_CHECK) static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params, struct dsc_mst_fairness_vars *vars, int count, @@ -839,7 +1008,11 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p drm_connector = ¶ms[i].aconnector->base; dc_dsc_get_default_config_option(params[i].sink->ctx->dc, &dsc_options); +#ifdef HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP dsc_options.max_target_bpp_limit_override_x16 = drm_connector->display_info.max_dsc_bpp * 16; +#else + dsc_options.max_target_bpp_limit_override_x16 = params[i].sink->edid_caps.panel_patch.max_dsc_target_bpp_limit * 16; +#endif memset(¶ms[i].timing->dsc_cfg, 0, sizeof(params[i].timing->dsc_cfg)); if (vars[i + k].dsc_enabled && dc_dsc_compute_config( @@ -872,11 +1045,11 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p if (params[i].sink) { if (params[i].sink->sink_signal != SIGNAL_TYPE_VIRTUAL && params[i].sink->sink_signal != SIGNAL_TYPE_NONE) - DRM_DEBUG_DRIVER("%s i=%d dispname=%s\n", __func__, i, + DRM_DEBUG_DRIVER("MST_DSC %s i=%d dispname=%s\n", __func__, i, params[i].sink->edid_caps.display_name); } - DRM_DEBUG_DRIVER("dsc=%d bits_per_pixel=%d pbn=%d\n", + DRM_DEBUG_DRIVER("MST_DSC dsc=%d bits_per_pixel=%d pbn=%d\n", params[i].timing->flags.DSC, params[i].timing->dsc_cfg.bits_per_pixel, vars[i + k].pbn); @@ -892,7 +1065,11 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn) struct dc_dsc_config_options dsc_options = {0}; dc_dsc_get_default_config_option(param.sink->ctx->dc, &dsc_options); +#ifdef HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP dsc_options.max_target_bpp_limit_override_x16 = drm_connector->display_info.max_dsc_bpp * 16; +#else + dsc_options.max_target_bpp_limit_override_x16 = param.sink->edid_caps.panel_patch.max_dsc_target_bpp_limit * 16; +#endif kbps = div_u64((u64)pbn * 994 * 8 * 54, 64); dc_dsc_compute_config( @@ -920,11 +1097,18 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int min_initial_slack; int next_index; int remaining_to_increase = 0; +#if !defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT) && !defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION) + int pbn_per_timeslot; +#endif int link_timeslots_used; int fair_pbn_alloc; int ret = 0; uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); +#if !defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT) && !defined(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION) + pbn_per_timeslot = dm_mst_get_pbn_divider(dc_link); +#endif + for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = @@ -955,17 +1139,36 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, link_timeslots_used = 0; for (i = 0; i < count; i++) - link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, dfixed_trunc(mst_state->pbn_div)); + link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION + dfixed_trunc(mst_state->pbn_div) +#elif HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT + mst_state->pbn_div +#else + pbn_per_timeslot +#endif + ); fair_pbn_alloc = - (63 - link_timeslots_used) / remaining_to_increase * dfixed_trunc(mst_state->pbn_div); + (63 - link_timeslots_used) / remaining_to_increase * +#ifdef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION + dfixed_trunc(mst_state->pbn_div); +#elif HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT + mst_state->pbn_div; +#else + pbn_per_timeslot; +#endif if (initial_slack[next_index] > fair_pbn_alloc) { vars[next_index].pbn += fair_pbn_alloc; ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, - vars[next_index].pbn); + vars[next_index].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , pbn_per_timeslot +#endif + ); if (ret < 0) return ret; @@ -977,7 +1180,11 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, - vars[next_index].pbn); + vars[next_index].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , pbn_per_timeslot +#endif + ); if (ret < 0) return ret; } @@ -986,7 +1193,11 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, - vars[next_index].pbn); + vars[next_index].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , pbn_per_timeslot +#endif + ); if (ret < 0) return ret; @@ -998,7 +1209,11 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, - vars[next_index].pbn); + vars[next_index].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , pbn_per_timeslot +#endif + ); if (ret < 0) return ret; } @@ -1025,6 +1240,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int remaining_to_try = 0; int ret; uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); + int var_pbn; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1054,26 +1270,45 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; + DRM_DEBUG_DRIVER("MST_DSC index #%d, try no compression\n", next_index); + var_pbn = vars[next_index].pbn; vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, - vars[next_index].pbn); - if (ret < 0) + vars[next_index].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , dm_mst_get_pbn_divider(dc_link) +#endif + ); + if (ret < 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n", + __func__, __LINE__, next_index, ret); + vars[next_index].pbn = var_pbn; return ret; + } ret = drm_dp_mst_atomic_check(state); if (ret == 0) { + DRM_DEBUG_DRIVER("MST_DSC index #%d, greedily disable dsc\n", next_index); vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); + DRM_DEBUG_DRIVER("MST_DSC index #%d, restore optimized pbn value\n", next_index); + vars[next_index].pbn = var_pbn; ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, - vars[next_index].pbn); - if (ret < 0) + vars[next_index].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , dm_mst_get_pbn_divider(dc_link) +#endif + ); + if (ret < 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n", + __func__, __LINE__, next_index, ret); return ret; + } } tried[next_index] = true; @@ -1082,6 +1317,15 @@ static int try_disable_dsc(struct drm_atomic_state *state, return 0; } +static void log_dsc_params(int count, struct dsc_mst_fairness_vars *vars, int k) +{ + int i; + + for (i = 0; i < count; i++) + DRM_DEBUG_DRIVER("MST_DSC DSC params: stream #%d --- dsc_enabled = %d, bpp_x16 = %d, pbn = %d\n", + i, vars[i + k].dsc_enabled, vars[i + k].bpp_x16, vars[i + k].pbn); +} + static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, struct dc_state *dc_state, struct dc_link *dc_link, @@ -1097,6 +1341,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int i, k, ret; bool debugfs_overwrite = false; uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); + struct drm_connector_state *new_conn_state; memset(params, 0, sizeof(params)); @@ -1104,6 +1349,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, return PTR_ERR(mst_state); /* Set up params */ + DRM_DEBUG_DRIVER("%s: MST_DSC Try to set up params from %d streams\n", __func__, dc_state->stream_count); for (i = 0; i < dc_state->stream_count; i++) { struct dc_dsc_policy dsc_policy = {0}; @@ -1119,6 +1365,14 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (!aconnector->mst_output_port) continue; + new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base); + + if (!new_conn_state) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC Skip the stream 0x%p with invalid new_conn_state\n", + __func__, __LINE__, stream); + continue; + } + stream->timing.flags.DSC = 0; params[count].timing = &stream->timing; @@ -1132,7 +1386,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel; params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported; - dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); if (!dc_dsc_compute_bandwidth_range( stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, @@ -1145,9 +1399,14 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].bw_range.stream_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing, dc_link_get_highest_encoding_format(dc_link)); + DRM_DEBUG_DRIVER("MST_DSC #%d stream 0x%p - max_kbps = %u, min_kbps = %u, uncompressed_kbps = %u\n", + count, stream, params[count].bw_range.max_kbps, params[count].bw_range.min_kbps, + params[count].bw_range.stream_kbps); count++; } + DRM_DEBUG_DRIVER("%s: MST_DSC Params set up for %d streams\n", __func__, count); + if (count == 0) { ASSERT(0); return 0; @@ -1159,13 +1418,18 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, *link_vars_start_index += count; /* Try no compression */ + DRM_DEBUG_DRIVER("MST_DSC Try no compression\n"); for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, - vars[i + k].pbn); + vars[i + k].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , dm_mst_get_pbn_divider(dc_link) +#endif + ); if (ret < 0) return ret; } @@ -1177,14 +1441,21 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, return ret; } + log_dsc_params(count, vars, k); + /* Try max compression */ + DRM_DEBUG_DRIVER("MST_DSC Try max compression\n"); for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn); + params[i].port, vars[i + k].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , dm_mst_get_pbn_divider(dc_link) +#endif + ); if (ret < 0) return ret; } else { @@ -1192,7 +1463,11 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn); + params[i].port, vars[i + k].pbn +#ifndef HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS + , dm_mst_get_pbn_divider(dc_link) +#endif + ); if (ret < 0) return ret; } @@ -1201,14 +1476,26 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret != 0) return ret; + log_dsc_params(count, vars, k); + /* Optimize degree of compression */ + DRM_DEBUG_DRIVER("MST_DSC Try optimize compression\n"); ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("MST_DSC Failed to optimize compression\n"); return ret; + } + + log_dsc_params(count, vars, k); + DRM_DEBUG_DRIVER("MST_DSC Try disable compression\n"); ret = try_disable_dsc(state, dc_link, params, vars, count, k); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("MST_DSC Failed to disable compression\n"); return ret; + } + + log_dsc_params(count, vars, k); set_dsc_configs_from_fairness_vars(params, vars, count, k); @@ -1230,17 +1517,19 @@ static bool is_dsc_need_re_compute( /* only check phy used by dsc mst branch */ if (dc_link->type != dc_connection_mst_branch) - return false; + goto out; /* add a check for older MST DSC with no virtual DPCDs */ if (needs_dsc_aux_workaround(dc_link) && (!(dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT || dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT))) - return false; + goto out; for (i = 0; i < MAX_PIPES; i++) stream_on_link[i] = NULL; + DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in new dc_state\n", __func__, dc_state->stream_count); + /* check if there is mode change in new request */ for (i = 0; i < dc_state->stream_count; i++) { struct drm_crtc_state *new_crtc_state; @@ -1250,20 +1539,25 @@ static bool is_dsc_need_re_compute( if (!stream) continue; + DRM_DEBUG_DRIVER("%s:%d MST_DSC checking #%d stream 0x%p\n", __func__, __LINE__, i, stream); + /* check if stream using the same link for mst */ if (stream->link != dc_link) continue; aconnector = (struct amdgpu_dm_connector *) stream->dm_stream_context; - if (!aconnector || !aconnector->dsc_aux) + if (!aconnector) continue; stream_on_link[new_stream_on_link_num] = aconnector; new_stream_on_link_num++; new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base); - if (!new_conn_state) + if (!new_conn_state) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_conn_state for stream 0x%p, aconnector 0x%p\n", + __func__, __LINE__, stream, aconnector); continue; + } if (IS_ERR(new_conn_state)) continue; @@ -1272,21 +1566,36 @@ static bool is_dsc_need_re_compute( continue; new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc); - if (!new_crtc_state) + if (!new_crtc_state) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC no new_crtc_state for crtc of stream 0x%p, aconnector 0x%p\n", + __func__, __LINE__, stream, aconnector); continue; + } if (IS_ERR(new_crtc_state)) continue; if (new_crtc_state->enable && new_crtc_state->active) { if (new_crtc_state->mode_changed || new_crtc_state->active_changed || - new_crtc_state->connectors_changed) - return true; + new_crtc_state->connectors_changed) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required." + "stream 0x%p in new dc_state\n", + __func__, __LINE__, stream); + is_dsc_need_re_compute = true; + goto out; + } } } - if (new_stream_on_link_num == 0) - return false; + if (new_stream_on_link_num == 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC no mode change request for streams in new dc_state\n", + __func__, __LINE__); + is_dsc_need_re_compute = false; + goto out; + } + + DRM_DEBUG_DRIVER("%s: MST_DSC check on %d streams in current dc_state\n", + __func__, dc->current_state->stream_count); /* check current_state if there stream on link but it is not in * new request state @@ -1310,11 +1619,18 @@ static bool is_dsc_need_re_compute( if (j == new_stream_on_link_num) { /* not in new state */ + DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required." + "stream 0x%p in current dc_state but not in new dc_state\n", + __func__, __LINE__, stream); is_dsc_need_re_compute = true; break; } } +out: + DRM_DEBUG_DRIVER("%s: MST_DSC dsc recompute %s\n", + __func__, is_dsc_need_re_compute ? "required" : "not required"); + return is_dsc_need_re_compute; } @@ -1343,6 +1659,9 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + DRM_DEBUG_DRIVER("%s: MST_DSC compute mst dsc configs for stream 0x%p, aconnector 0x%p\n", + __func__, stream, aconnector); + if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port) continue; @@ -1375,8 +1694,11 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, stream = dc_state->streams[i]; if (stream->timing.flags.DSC == 1) - if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) + if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC Failed to request dsc hw resource for stream 0x%p\n", + __func__, __LINE__, stream); return -EINVAL; + } } return ret; @@ -1405,6 +1727,9 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + DRM_DEBUG_DRIVER("MST_DSC pre compute mst dsc configs for #%d stream 0x%p, aconnector 0x%p\n", + i, stream, aconnector); + if (!aconnector || !aconnector->dc_sink || !aconnector->mst_output_port) continue; @@ -1494,12 +1819,12 @@ int pre_validate_dsc(struct drm_atomic_state *state, int ret = 0; if (!is_dsc_precompute_needed(state)) { - DRM_INFO_ONCE("DSC precompute is not needed.\n"); + DRM_INFO_ONCE("%s:%d MST_DSC dsc precompute is not needed\n", __func__, __LINE__); return 0; } ret = dm_atomic_get_state(state, dm_state_ptr); if (ret != 0) { - DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); + DRM_INFO_ONCE("%s:%d MST_DSC dm_atomic_get_state() failed\n", __func__, __LINE__); return ret; } dm_state = *dm_state_ptr; @@ -1553,7 +1878,8 @@ int pre_validate_dsc(struct drm_atomic_state *state, ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars); if (ret != 0) { - DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); + DRM_INFO_ONCE("%s:%d MST_DSC dsc pre_compute_mst_dsc_configs_for_state() failed\n", + __func__, __LINE__); ret = -EINVAL; goto clean_exit; } @@ -1567,12 +1893,15 @@ int pre_validate_dsc(struct drm_atomic_state *state, if (local_dc_state->streams[i] && dc_is_timing_changed(stream, local_dc_state->streams[i])) { - DRM_INFO_ONCE("crtc[%d] needs mode_changed\n", i); + DRM_INFO_ONCE("%s:%d MST_DSC crtc[%d] needs mode_change\n", __func__, __LINE__, i); } else { int ind = find_crtc_index_in_state_by_stream(state, stream); - if (ind >= 0) + if (ind >= 0) { + DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n", + __func__, __LINE__, stream); state->crtcs[ind].new_state->mode_changed = 0; + } } } clean_exit: @@ -1588,6 +1917,7 @@ int pre_validate_dsc(struct drm_atomic_state *state, return ret; } +#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX static unsigned int kbps_from_pbn(unsigned int pbn) { unsigned int kbps = pbn; @@ -1605,7 +1935,7 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, { struct dc_dsc_policy dsc_policy = {0}; - dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy); + dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link)); dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], stream->sink->ctx->dc->debug.dsc_min_slice_height_override, dsc_policy.min_target_bpp * 16, @@ -1616,6 +1946,8 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16; } #endif +#endif /* HAVE_DRM_DP_MST_ATOMIC_CHECK */ +#endif #if defined(CONFIG_DRM_AMD_DC_FP) static bool dp_get_link_current_set_bw(struct drm_dp_aux *aux, uint32_t *cur_link_bw) @@ -1675,6 +2007,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( { #if defined(CONFIG_DRM_AMD_DC_FP) int branch_max_throughput_mps = 0; +#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX) && defined(HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP) struct dc_link_settings cur_link_settings; uint32_t end_to_end_bw_in_kbps = 0; uint32_t root_link_bw_in_kbps = 0; @@ -1697,7 +2030,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_to_end_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); if (stream_kbps <= end_to_end_bw_in_kbps) { - DRM_DEBUG_DRIVER("No DSC needed. End-to-end bw sufficient."); + DRM_DEBUG_DRIVER("MST_DSC no dsc required. End-to-end bw sufficient\n"); return DC_OK; } @@ -1710,7 +2043,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( /*capable of dsc passthough. dsc bitstream along the entire path*/ if (aconnector->mst_output_port->passthrough_aux) { if (bw_range.min_kbps > end_to_end_bw_in_kbps) { - DRM_DEBUG_DRIVER("DSC passthrough. Max dsc compression can't fit into end-to-end bw\n"); + DRM_DEBUG_DRIVER("MST_DSC dsc passthrough and decode at endpoint" + "Max dsc compression bw can't fit into end-to-end bw\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } else { @@ -1721,7 +2055,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( /*Get last DP link BW capability*/ if (dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw)) { if (stream_kbps > end_link_bw) { - DRM_DEBUG_DRIVER("DSC decode at last link. Mode required bw can't fit into available bw\n"); + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + "Mode required bw can't fit into last link\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } @@ -1734,7 +2069,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn); virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps); if (bw_range.min_kbps > virtual_channel_bw_in_kbps) { - DRM_DEBUG_DRIVER("DSC decode at last link. Max dsc compression can't fit into MST available bw\n"); + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + "Max dsc compression can't fit into MST available bw\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } @@ -1751,9 +2087,9 @@ enum dc_status dm_dp_mst_is_port_support_mode( dc_link_get_highest_encoding_format(stream->link), &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("Require dsc and dsc config found\n"); + DRM_DEBUG_DRIVER("MST_DSC require dsc and dsc config found\n"); } else { - DRM_DEBUG_DRIVER("Require dsc but can't find appropriate dsc config\n"); + DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find appropriate dsc config\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } @@ -1775,13 +2111,50 @@ enum dc_status dm_dp_mst_is_port_support_mode( if (branch_max_throughput_mps != 0 && ((stream->timing.pix_clk_100hz / 10) > branch_max_throughput_mps * 1000)) { - DRM_DEBUG_DRIVER("DSC is required but max throughput mps fails"); + DRM_DEBUG_DRIVER("MST_DSC require dsc but max throughput mps fails\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } } else { - DRM_DEBUG_DRIVER("DSC is required but can't find common dsc config."); + DRM_DEBUG_DRIVER("MST_DSC require dsc but can't find common dsc config\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; + } +#else + int pbn; + /* Check if mode could be supported within max slot + * number of current mst link and full_pbn of mst links. + */ + int pbn_div, slot_num, max_slot_num; + enum dc_link_encoding_format link_encoding; + uint16_t fec_overhead_multiplier_x1000 = + get_fec_overhead_multiplier(stream->link); + uint32_t stream_kbps = dc_bandwidth_in_kbps_from_timing( + &stream->timing, + dc_link_get_highest_encoding_format(stream->link)); + + pbn = kbps_to_peak_pbn(stream_kbps, fec_overhead_multiplier_x1000); + pbn_div = dm_mst_get_pbn_divider(stream->link); + slot_num = DIV_ROUND_UP(pbn, pbn_div); + + link_encoding = dc_link_get_highest_encoding_format(stream->link); + if (link_encoding == DC_LINK_ENCODING_DP_8b_10b) + max_slot_num = 63; + else if (link_encoding == DC_LINK_ENCODING_DP_128b_132b) + max_slot_num = 64; + else { + DRM_DEBUG_DRIVER("Invalid link encoding format\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; + } + + if (slot_num > max_slot_num || +#ifdef HAVE_DRM_DP_MST_PORT_FULL_PBN + pbn > aconnector->mst_output_port->full_pbn) { +#else + pbn > aconnector->mst_output_port->available_pbn) { +#endif + DRM_DEBUG_DRIVER("Mode can not be supported within mst links!"); return DC_FAIL_BANDWIDTH_VALIDATE; } +#endif #endif return DC_OK; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 1ff469ef51af1..149672e44a93d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "amdgpu.h" @@ -52,10 +53,12 @@ static const uint32_t rgb_formats[] = { DRM_FORMAT_XBGR2101010, DRM_FORMAT_ARGB2101010, DRM_FORMAT_ABGR2101010, +#ifdef DRM_FORMAT_XRGB16161616 DRM_FORMAT_XRGB16161616, DRM_FORMAT_XBGR16161616, DRM_FORMAT_ARGB16161616, DRM_FORMAT_ABGR16161616, +#endif DRM_FORMAT_XBGR8888, DRM_FORMAT_ABGR8888, DRM_FORMAT_RGB565, @@ -90,10 +93,12 @@ enum dm_micro_swizzle { MICRO_SWIZZLE_R = 3 }; +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED const struct drm_format_info *amdgpu_dm_plane_get_format_info(const struct drm_mode_fb_cmd2 *cmd) { return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); } +#endif void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state, bool *per_pixel_alpha, bool *pre_multiplied_alpha, @@ -300,7 +305,62 @@ static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev, return 0; } +static void +fill_dcc_params_from_flags(const struct amdgpu_framebuffer *afb, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + const uint64_t flags, bool force_disable_dcc) +{ + uint64_t dcc_address; + uint64_t plane_address = afb->address + afb->base.offsets[0]; + uint32_t offset = AMDGPU_TILING_GET(flags, DCC_OFFSET_256B); + uint32_t i64b = AMDGPU_TILING_GET(flags, DCC_INDEPENDENT_64B) != 0; + + if (!offset || force_disable_dcc) + return; + + dcc->enable = 1; + dcc->meta_pitch = AMDGPU_TILING_GET(flags, DCC_PITCH_MAX) + 1; + dcc->independent_64b_blks = i64b; + + if (dcc->independent_64b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b; + else + dcc->dcc_ind_blk = hubp_ind_block_unconstrained; + + dcc_address = plane_address + (uint64_t)offset * 256; + address->grph.meta_addr.low_part = lower_32_bits(dcc_address); + address->grph.meta_addr.high_part = upper_32_bits(dcc_address); +} + +static int +fill_gfx9_plane_attributes_from_flags(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const struct plane_size *plane_size, + union dc_tiling_info *tiling_info, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + uint64_t tiling_flags, + bool force_disable_dcc) +{ + int ret; + amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info); + + tiling_info->gfx9.swizzle = + AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); + + fill_dcc_params_from_flags(afb, dcc, address, tiling_flags, force_disable_dcc); + ret = amdgpu_dm_plane_validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); + if (ret) + return ret; + + return 0; +} + +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, const struct amdgpu_framebuffer *afb, const enum surface_pixel_format format, @@ -351,6 +411,7 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg return ret; } +#endif static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amdgpu_device *adev, const struct amdgpu_framebuffer *afb, @@ -793,8 +854,10 @@ static int amdgpu_dm_plane_get_plane_formats(const struct drm_plane *plane, if (plane_cap && plane_cap->pixel_format_support.nv12) formats[num_formats++] = DRM_FORMAT_NV12; + if (plane_cap && plane_cap->pixel_format_support.p010) formats[num_formats++] = DRM_FORMAT_P010; + if (plane_cap && plane_cap->pixel_format_support.fp16) { formats[num_formats++] = DRM_FORMAT_XRGB16161616F; formats[num_formats++] = DRM_FORMAT_ARGB16161616F; @@ -904,13 +967,26 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, if (ret) return ret; } else if (adev->family >= AMDGPU_FAMILY_AI) { - ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, - rotation, plane_size, - tiling_info, dcc, - address, - force_disable_dcc); - if (ret) - return ret; +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED + if (afb->base.flags & DRM_MODE_FB_MODIFIERS) { + ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, + rotation, plane_size, + tiling_info, dcc, + address, + force_disable_dcc); + if (ret) + return ret; + } else { +#endif + ret = fill_gfx9_plane_attributes_from_flags(adev, afb, format, rotation, + plane_size, tiling_info, dcc, + address, tiling_flags, + force_disable_dcc); + if (ret) + return ret; +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED + } +#endif } else { amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags); } @@ -935,17 +1011,21 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); return r; } - r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(amdkcl_ttm_resvp(&rbo->tbo), 1); if (r) { dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); goto error_unlock; @@ -956,6 +1036,7 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, else domain = AMDGPU_GEM_DOMAIN_VRAM; + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) @@ -969,9 +1050,11 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, goto error_unpin; } +#ifdef HAVE_DRM_GEM_PLANE_HELPER_PREPARE_FB r = drm_gem_plane_helper_prepare_fb(plane, new_state); if (unlikely(r != 0)) goto error_unpin; +#endif amdgpu_bo_unreserve(rbo); @@ -1023,7 +1106,7 @@ static void amdgpu_dm_plane_helper_cleanup_fb(struct drm_plane *plane, if (!old_state->fb) return; - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + rbo = gem_to_amdgpu_bo(drm_gem_fb_get_obj(old_state->fb, 0)); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { DRM_ERROR("failed to reserve rbo before unpin\n"); @@ -1157,8 +1240,8 @@ int amdgpu_dm_plane_fill_dc_scaling_info(struct amdgpu_device *adev, */ if (((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) || (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) && - (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && - (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) + (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && + (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) return -EINVAL; scaling_info->src_rect.width = state->src_w >> 16; @@ -1215,10 +1298,18 @@ int amdgpu_dm_plane_fill_dc_scaling_info(struct amdgpu_device *adev, } static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane, - struct drm_atomic_state *state) +#ifdef HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS + struct drm_atomic_state *state) +#else + struct drm_plane_state *state) +#endif { +#ifdef HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); +#else + struct drm_plane_state *new_plane_state = state; +#endif struct amdgpu_device *adev = drm_to_adev(plane->dev); struct dc *dc = adev->dm.dc; struct dm_plane_state *dm_plane_state; @@ -1233,9 +1324,13 @@ static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane, if (!dm_plane_state->dc_state) return 0; - new_crtc_state = - drm_atomic_get_new_crtc_state(state, - new_plane_state->crtc); + new_crtc_state = +#ifdef HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS + drm_atomic_get_new_crtc_state(state, new_plane_state->crtc); +#else + drm_atomic_get_new_crtc_state(state->state, state->crtc); +#endif + if (!new_crtc_state) return -EINVAL; @@ -1254,7 +1349,11 @@ static int amdgpu_dm_plane_atomic_check(struct drm_plane *plane, } static int amdgpu_dm_plane_atomic_async_check(struct drm_plane *plane, - struct drm_atomic_state *state) +#ifdef HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS + struct drm_atomic_state *state) +#else + struct drm_plane_state *state) +#endif { struct drm_crtc_state *new_crtc_state; struct drm_plane_state *new_plane_state; @@ -1264,8 +1363,12 @@ static int amdgpu_dm_plane_atomic_async_check(struct drm_plane *plane, if (plane->type != DRM_PLANE_TYPE_CURSOR) return -EINVAL; +#ifdef HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS new_plane_state = drm_atomic_get_new_plane_state(state, plane); new_crtc_state = drm_atomic_get_new_crtc_state(state, new_plane_state->crtc); +#else + new_crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); +#endif dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); /* Reject overlay cursors for now*/ if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) @@ -1278,6 +1381,7 @@ int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc struct dc_cursor_position *position) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(plane->dev); int x, y; int xorigin = 0, yorigin = 0; @@ -1309,12 +1413,14 @@ int amdgpu_dm_plane_get_cursor_position(struct drm_plane *plane, struct drm_crtc y = 0; } position->enable = true; - position->translate_by_source = true; position->x = x; position->y = y; position->x_hotspot = xorigin; position->y_hotspot = yorigin; + if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(4, 0, 1)) + position->translate_by_source = true; + return 0; } @@ -1389,12 +1495,21 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane, } static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane, - struct drm_atomic_state *state) +#ifdef HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS + struct drm_atomic_state *state) +#else + struct drm_plane_state *new_state) +#endif { +#ifdef HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, plane); +#else + struct drm_plane_state *old_state = + drm_atomic_get_old_plane_state(new_state->state, plane); +#endif trace_amdgpu_dm_atomic_update_cursor(new_state); @@ -1411,7 +1526,6 @@ static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane, amdgpu_dm_plane_handle_cursor_update(plane, old_state); } - static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { .prepare_fb = amdgpu_dm_plane_helper_prepare_fb, .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb, @@ -1480,6 +1594,7 @@ static struct drm_plane_state *amdgpu_dm_plane_drm_plane_duplicate_state(struct return &dm_plane_state->base; } +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED static bool amdgpu_dm_plane_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -1541,6 +1656,7 @@ static bool amdgpu_dm_plane_format_mod_supported(struct drm_plane *plane, return true; } +#endif static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) @@ -1754,7 +1870,9 @@ static const struct drm_plane_funcs dm_plane_funcs = { .reset = amdgpu_dm_plane_drm_plane_reset, .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED .format_mod_supported = amdgpu_dm_plane_format_mod_supported, +#endif #ifdef AMD_PRIVATE_COLOR .atomic_set_property = dm_atomic_plane_set_property, .atomic_get_property = dm_atomic_plane_get_property, @@ -1776,12 +1894,16 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, num_formats = amdgpu_dm_plane_get_plane_formats(plane, plane_cap, formats, ARRAY_SIZE(formats)); +#ifdef HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED res = amdgpu_dm_plane_get_plane_modifiers(dm->adev, plane->type, &modifiers); if (res) return res; +#endif +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED if (modifiers == NULL) adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true; +#endif res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, &dm_plane_funcs, formats, num_formats, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h index 4686d4b0cbad2..0f969a7eb5aa5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -40,6 +40,7 @@ #include "dc/inc/hw/optc.h" #include "dc/inc/core_types.h" +#include DECLARE_EVENT_CLASS(amdgpu_dc_reg_template, TP_PROTO(unsigned long *count, uint32_t reg, uint32_t value), @@ -87,7 +88,7 @@ TRACE_EVENT(amdgpu_dc_performance, __entry->writes = write_count; __entry->read_delta = read_count - *last_read; __entry->write_delta = write_count - *last_write; - __assign_str(func); + __amdkcl_assign_str(func, func); __entry->line = line; *last_read = read_count; *last_write = write_count; @@ -112,13 +113,19 @@ TRACE_EVENT(amdgpu_dm_connector_atomic_check, __field(uint32_t, crtc_id) __field(uint32_t, best_encoder_id) __field(enum drm_link_status, link_status) +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE __field(bool, self_refresh_aware) +#endif __field(enum hdmi_picture_aspect, picture_aspect_ratio) __field(unsigned int, content_type) +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE __field(unsigned int, hdcp_content_type) +#endif __field(unsigned int, content_protection) __field(unsigned int, scaling_mode) +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE __field(u32, colorspace) +#endif __field(u8, max_requested_bpc) __field(u8, max_bpc) ), @@ -132,28 +139,52 @@ TRACE_EVENT(amdgpu_dm_connector_atomic_check, __entry->best_encoder_id = state->best_encoder ? state->best_encoder->base.id : 0; __entry->link_status = state->link_status; +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE __entry->self_refresh_aware = state->self_refresh_aware; +#endif __entry->picture_aspect_ratio = state->picture_aspect_ratio; __entry->content_type = state->content_type; +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE __entry->hdcp_content_type = state->hdcp_content_type; +#endif __entry->content_protection = state->content_protection; __entry->scaling_mode = state->scaling_mode; +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE __entry->colorspace = state->colorspace; +#endif __entry->max_requested_bpc = state->max_requested_bpc; __entry->max_bpc = state->max_bpc; ), TP_printk("conn_id=%u conn_state=%p state=%p commit=%p crtc_id=%u " - "best_encoder_id=%u link_status=%d self_refresh_aware=%d " + "best_encoder_id=%u link_status=%d " +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE + "self_refresh_aware=%d " +#endif "picture_aspect_ratio=%d content_type=%u " - "hdcp_content_type=%u content_protection=%u scaling_mode=%u " - "colorspace=%u max_requested_bpc=%u max_bpc=%u", +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE + "hdcp_content_type=%u " +#endif + "content_protection=%u scaling_mode=%u " +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE + "colorspace=%u " +#endif + "max_requested_bpc=%u max_bpc=%u", __entry->conn_id, __entry->conn_state, __entry->state, __entry->commit, __entry->crtc_id, __entry->best_encoder_id, - __entry->link_status, __entry->self_refresh_aware, + __entry->link_status, +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE + __entry->self_refresh_aware, +#endif __entry->picture_aspect_ratio, __entry->content_type, - __entry->hdcp_content_type, __entry->content_protection, - __entry->scaling_mode, __entry->colorspace, +#ifdef HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE + __entry->hdcp_content_type, +#endif + __entry->content_protection, + __entry->scaling_mode, +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE + __entry->colorspace, +#endif __entry->max_requested_bpc, __entry->max_bpc) ); @@ -175,9 +206,13 @@ TRACE_EVENT(amdgpu_dm_crtc_atomic_check, __field(bool, zpos_changed) __field(bool, color_mgmt_changed) __field(bool, no_vblank) +#ifdef HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP __field(bool, async_flip) +#endif __field(bool, vrr_enabled) +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE __field(bool, self_refresh_active) +#endif __field(u32, plane_mask) __field(u32, connector_mask) __field(u32, encoder_mask) @@ -197,9 +232,13 @@ TRACE_EVENT(amdgpu_dm_crtc_atomic_check, __entry->zpos_changed = state->zpos_changed; __entry->color_mgmt_changed = state->color_mgmt_changed; __entry->no_vblank = state->no_vblank; +#ifdef HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP __entry->async_flip = state->async_flip; +#endif __entry->vrr_enabled = state->vrr_enabled; +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE __entry->self_refresh_active = state->self_refresh_active; +#endif __entry->plane_mask = state->plane_mask; __entry->connector_mask = state->connector_mask; __entry->encoder_mask = state->encoder_mask; @@ -207,16 +246,29 @@ TRACE_EVENT(amdgpu_dm_crtc_atomic_check, TP_printk("crtc_id=%u crtc_state=%p state=%p commit=%p changed(" "planes=%d mode=%d active=%d conn=%d zpos=%d color_mgmt=%d) " - "state(enable=%d active=%d async_flip=%d vrr_enabled=%d " - "self_refresh_active=%d no_vblank=%d) mask(plane=%x conn=%x " + "state(enable=%d active=%d " +#ifdef HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP + "async_flip=%d " +#endif + "vrr_enabled=%d " +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE + "self_refresh_active=%d " +#endif + "no_vblank=%d) mask(plane=%x conn=%x " "enc=%x)", __entry->crtc_id, __entry->crtc_state, __entry->state, __entry->commit, __entry->planes_changed, __entry->mode_changed, __entry->active_changed, __entry->connectors_changed, __entry->zpos_changed, __entry->color_mgmt_changed, __entry->enable, __entry->active, - __entry->async_flip, __entry->vrr_enabled, - __entry->self_refresh_active, __entry->no_vblank, +#ifdef HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP + __entry->async_flip, +#endif + __entry->vrr_enabled, +#ifdef HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE + __entry->self_refresh_active, +#endif + __entry->no_vblank, __entry->plane_mask, __entry->connector_mask, __entry->encoder_mask) ); @@ -322,7 +374,9 @@ TRACE_EVENT(amdgpu_dm_atomic_state_template, __field(bool, allow_modeset) __field(bool, legacy_cursor_update) __field(bool, async_update) +#ifdef HAVE_STRUCT_DRM_ATOMIC_STATE_DUPLICATED __field(bool, duplicated) +#endif __field(int, num_connector) __field(int, num_private_objs) ), @@ -332,16 +386,26 @@ TRACE_EVENT(amdgpu_dm_atomic_state_template, __entry->allow_modeset = state->allow_modeset; __entry->legacy_cursor_update = state->legacy_cursor_update; __entry->async_update = state->async_update; +#ifdef HAVE_STRUCT_DRM_ATOMIC_STATE_DUPLICATED __entry->duplicated = state->duplicated; +#endif __entry->num_connector = state->num_connector; __entry->num_private_objs = state->num_private_objs; ), TP_printk("state=%p allow_modeset=%d legacy_cursor_update=%d " - "async_update=%d duplicated=%d num_connector=%d " + "async_update=%d " +#ifdef HAVE_STRUCT_DRM_ATOMIC_STATE_DUPLICATED + "duplicated=%d " +#endif + "num_connector=%d " "num_private_objs=%d", __entry->state, __entry->allow_modeset, __entry->legacy_cursor_update, - __entry->async_update, __entry->duplicated, __entry->num_connector, + __entry->async_update, +#ifdef HAVE_STRUCT_DRM_ATOMIC_STATE_DUPLICATED + __entry->duplicated, +#endif + __entry->num_connector, __entry->num_private_objs) ); @@ -620,6 +684,7 @@ TRACE_EVENT(amdgpu_dmub_trace_high_irq, __entry->param0, __entry->param1) ); +#ifndef HAVE_KTIME_IS_UNION TRACE_EVENT(amdgpu_refresh_rate_track, TP_PROTO(int crtc_index, ktime_t refresh_rate_ns, uint32_t refresh_rate_hz), TP_ARGS(crtc_index, refresh_rate_ns, refresh_rate_hz), @@ -638,6 +703,7 @@ TRACE_EVENT(amdgpu_refresh_rate_track, __entry->refresh_rate_hz, __entry->refresh_rate_ns) ); +#endif TRACE_EVENT(dcn_fpu, TP_PROTO(bool begin, const char *function, const int line, const int recursion_depth), diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c index 08c494a7a21ba..faf19d03312fa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c @@ -80,6 +80,7 @@ static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector) return drm_add_modes_noedid(connector, 3840, 2160); } +#ifdef HAVE_DRM_CONNECTOR_HELPER_FUNCS_PREPARE_WRITEBACK_JOB static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector, struct drm_writeback_job *job) { @@ -106,7 +107,7 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector return r; } - r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(amdkcl_ttm_resvp(&rbo->tbo), 1); if (r) { dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); goto error_unlock; @@ -114,6 +115,7 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector domain = amdgpu_display_supported_domains(adev, rbo->flags); + rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; r = amdgpu_bo_pin(rbo, domain); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) @@ -163,6 +165,7 @@ static void amdgpu_dm_wb_cleanup_job(struct drm_writeback_connector *connector, amdgpu_bo_unreserve(rbo); amdgpu_bo_unref(&rbo); } +#endif static const struct drm_encoder_helper_funcs amdgpu_dm_wb_encoder_helper_funcs = { .atomic_check = amdgpu_dm_wb_encoder_atomic_check, @@ -178,8 +181,10 @@ static const struct drm_connector_funcs amdgpu_dm_wb_connector_funcs = { static const struct drm_connector_helper_funcs amdgpu_dm_wb_conn_helper_funcs = { .get_modes = amdgpu_dm_wb_connector_get_modes, +#ifdef HAVE_DRM_CONNECTOR_HELPER_FUNCS_PREPARE_WRITEBACK_JOB .prepare_writeback_job = amdgpu_dm_wb_prepare_job, .cleanup_writeback_job = amdgpu_dm_wb_cleanup_job, +#endif }; int amdgpu_dm_wb_connector_init(struct amdgpu_display_manager *dm, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index e46f8ce41d871..35066ae9acbce 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -26,7 +26,24 @@ #include "dc_trace.h" +#ifdef CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT #include +#else +#if defined(CONFIG_X86) +#if defined(HAVE_ASM_FPU_API_H) +#include +#else +#include +#endif +#elif defined(CONFIG_PPC64) +#include +#include +#elif defined(CONFIG_ARM64) +#include +#elif defined(CONFIG_LOONGARCH) +#include +#endif +#endif /** * DOC: DC FPU manipulation overview @@ -79,8 +96,19 @@ void dc_fpu_begin(const char *function_name, const int line) preempt_disable(); depth = __this_cpu_inc_return(fpu_recursion_depth); if (depth == 1) { +#ifdef CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT BUG_ON(!kernel_fpu_available()); kernel_fpu_begin(); +#else +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) + kernel_fpu_begin(); +#elif defined(CONFIG_PPC64) + if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) + enable_kernel_fp(); +#elif defined(CONFIG_ARM64) + kernel_neon_begin(); +#endif +#endif } TRACE_DCN_FPU(true, function_name, line, depth); @@ -102,7 +130,18 @@ void dc_fpu_end(const char *function_name, const int line) depth = __this_cpu_dec_return(fpu_recursion_depth); if (depth == 0) { +#ifdef CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT + kernel_fpu_end(); +#else +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) kernel_fpu_end(); +#elif defined(CONFIG_PPC64) + if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) + disable_kernel_fp(); +#elif defined(CONFIG_ARM64) + kernel_neon_end(); +#endif +#endif } else { WARN_ON_ONCE(depth < 0); } diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index e47e9db062f44..681799468487c 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -569,7 +569,7 @@ static void calculate_bandwidth( break; } data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1)); - /*clamp the partitions to the maxium number supported by the lb*/ + /* clamp the partitions to the maximum number supported by the lb */ if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) { data->lb_partitions_max[i] = bw_int_to_fixed(10); } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index adc710fe4a453..8d2cf95ae7393 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -78,10 +78,3 @@ void bios_set_scratch_critical_state( uint32_t critial_state = state ? 1 : 0; REG_UPDATE(BIOS_SCRATCH_6, S6_CRITICAL_STATE, critial_state); } - -uint32_t bios_get_vga_enabled_displays( - struct dc_bios *bios) -{ - return REG_READ(BIOS_SCRATCH_3) & 0XFFFF; -} - diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h index e1b4a40a353db..ab162f2fe5776 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h @@ -34,7 +34,6 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset, bool bios_is_accelerated_mode(struct dc_bios *bios); void bios_set_scratch_acc_mode_change(struct dc_bios *bios, uint32_t state); void bios_set_scratch_critical_state(struct dc_bios *bios, bool state); -uint32_t bios_get_vga_enabled_displays(struct dc_bios *bios); #define GET_IMAGE(type, offset) ((type *) bios_get_image(&bp->base, offset, sizeof(type))) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 4254bdfefe38c..7d18f372ce7ab 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -227,7 +227,7 @@ static void init_transmitter_control(struct bios_parser *bp) uint8_t frev; uint8_t crev = 0; - if (!BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev)) + if (!BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) && (bp->base.ctx->dc->ctx->dce_version <= DCN_VERSION_2_0)) BREAK_TO_DEBUGGER(); switch (crev) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index f770828df1493..0e243f4344d05 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -59,6 +59,7 @@ int clk_mgr_helper_get_active_display_cnt( display_count = 0; for (i = 0; i < context->stream_count; i++) { const struct dc_stream_state *stream = context->streams[i]; + const struct dc_stream_status *stream_status = &context->stream_status[i]; /* Don't count SubVP phantom pipes as part of active * display count @@ -66,13 +67,7 @@ int clk_mgr_helper_get_active_display_cnt( if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) continue; - /* - * Only notify active stream or virtual stream. - * Need to notify virtual stream to work around - * headless case. HPD does not fire when system is in - * S0i2. - */ - if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL) + if (!stream->dpms_off || (stream_status && stream_status->plane_count)) display_count++; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index e2d906327e2ed..734c0819bb3f3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -132,6 +132,8 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * for (i = 0; i < dc->res_pool->pipe_count; ++i) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dccg *dccg = clk_mgr_internal->dccg; struct pipe_ctx *pipe = safe_to_lower ? &context->res_ctx.pipe_ctx[i] : &dc->current_state->res_ctx.pipe_ctx[i]; @@ -148,8 +150,21 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * new_pipe->stream_res.stream_enc && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || - !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { + + bool has_active_hpo = false; + + if (old_pipe->stream && new_pipe->stream && old_pipe->stream == new_pipe->stream) { + has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) && + dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe); + + } + + + if (!has_active_hpo && !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe) && + (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || + !pipe->stream->link_enc) && !stream_changed_otg_dig_on)) { + + /* This w/a should not trigger when we have a dig active */ if (disable) { if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) @@ -257,11 +272,11 @@ static void dcn35_notify_host_router_bw(struct clk_mgr *clk_mgr_base, struct dc_ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); uint32_t host_router_bw_kbps[MAX_HOST_ROUTERS_NUM] = { 0 }; int i; - for (i = 0; i < context->stream_count; ++i) { const struct dc_stream_state *stream = context->streams[i]; const struct dc_link *link = stream->link; - uint8_t lowest_dpia_index = 0, hr_index = 0; + uint8_t lowest_dpia_index = 0; + unsigned int hr_index = 0; if (!link) continue; @@ -271,6 +286,8 @@ static void dcn35_notify_host_router_bw(struct clk_mgr *clk_mgr_base, struct dc_ continue; hr_index = (link->link_index - lowest_dpia_index) / 2; + if (hr_index >= MAX_HOST_ROUTERS_NUM) + continue; host_router_bw_kbps[hr_index] += dc_bandwidth_in_kbps_from_timing( &stream->timing, dc_link_get_highest_encoding_format(link)); } @@ -382,6 +399,9 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) + new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; + clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; dcn35_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); dcn35_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); @@ -1097,7 +1117,7 @@ void dcn35_clk_mgr_construct( clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, - DC_MEM_ALLOC_TYPE_FRAME_BUFFER, + DC_MEM_ALLOC_TYPE_GART, sizeof(struct dcn35_watermarks), &clk_mgr->smu_wm_set.mc_address.quad_part); @@ -1109,7 +1129,7 @@ void dcn35_clk_mgr_construct( smu_dpm_clks.dpm_clks = (DpmClocks_t_dcn35 *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, - DC_MEM_ALLOC_TYPE_FRAME_BUFFER, + DC_MEM_ALLOC_TYPE_GART, sizeof(DpmClocks_t_dcn35), &smu_dpm_clks.mc_address.quad_part); @@ -1206,7 +1226,7 @@ void dcn35_clk_mgr_construct( } if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0) - dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER, + dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART, smu_dpm_clks.dpm_clks); if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { @@ -1219,6 +1239,12 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_dpp_power_gate = false; ctx->dc->debug.disable_hubp_power_gate = false; ctx->dc->debug.disable_dsc_power_gate = false; + + /* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */ + if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE && + ctx->dce_version == DCN_VERSION_3_5 && + ((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00)) + ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; } else { /*let's reset the config control flag*/ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 01ea3a31e54da..8cfc5f4359374 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -1366,9 +1366,6 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, unsigned int num_steps = 0; - if (dc->work_arounds.skip_clock_update) - return; - if (dc->debug.enable_legacy_clock_update) { dcn401_update_clocks_legacy(clk_mgr_base, context, safe_to_lower); return; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c8dabb081b3d9..edb70cc0ccba7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -621,8 +621,8 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream, * dc_stream_configure_crc() - Configure CRC capture for the given stream. * @dc: DC Object * @stream: The stream to configure CRC on. - * @enable: Enable CRC if true, disable otherwise. * @crc_window: CRC window (x/y start/end) information + * @enable: Enable CRC if true, disable otherwise. * @continuous: Capture CRC on every frame if true. Otherwise, only capture * once. * @@ -1157,6 +1157,8 @@ static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *conte get_surface_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SWIZZLE) get_surface_tile_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); + else if (dc->debug.visual_confirm == VISUAL_CONFIRM_HW_CURSOR) + get_cursor_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); else { if (dc->ctx->dce_version < DCN_VERSION_2_0) color_space_to_black_color( @@ -1233,16 +1235,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) */ if (is_phantom) { if (tg->funcs->enable_crtc) { - int main_pipe_width = 0, main_pipe_height = 0; - struct dc_stream_state *old_paired_stream = dc_state_get_paired_subvp_stream(dc->current_state, old_stream); - - if (old_paired_stream) { - main_pipe_width = old_paired_stream->dst.width; - main_pipe_height = old_paired_stream->dst.height; - } - - if (dc->hwss.blank_phantom) - dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); + if (dc->hwseq->funcs.blank_pixel_data) + dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); tg->funcs->enable_crtc(tg); } } @@ -1437,6 +1431,7 @@ void dc_hardware_init(struct dc *dc) detect_edp_presence(dc); if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW) dc->hwss.init_hw(dc); + dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); } void dc_init_callbacks(struct dc *dc, @@ -1767,7 +1762,7 @@ bool dc_validate_boot_timing(const struct dc *dc, if (crtc_timing->pix_clk_100hz != pix_clk_100hz) return false; - if (!se->funcs->dp_get_pixel_format) + if (!se || !se->funcs->dp_get_pixel_format) return false; if (!se->funcs->dp_get_pixel_format( @@ -1876,6 +1871,41 @@ void dc_z10_save_init(struct dc *dc) dc->hwss.z10_save_init(dc); } +/* Set a pipe unlock order based on the change in DET allocation and stores it in dc scratch memory + * Prevents over allocation of DET during unlock process + * e.g. 2 pipe config with different streams with a max of 20 DET segments + * Before: After: + * - Pipe0: 10 DET segments - Pipe0: 12 DET segments + * - Pipe1: 10 DET segments - Pipe1: 8 DET segments + * If Pipe0 gets updated first, 22 DET segments will be allocated + */ +static void determine_pipe_unlock_order(struct dc *dc, struct dc_state *context) +{ + unsigned int i = 0; + struct pipe_ctx *pipe = NULL; + struct timing_generator *tg = NULL; + + if (!dc->config.set_pipe_unlock_order) + return; + + memset(dc->scratch.pipes_to_unlock_first, 0, sizeof(dc->scratch.pipes_to_unlock_first)); + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + continue; + } + + if (resource_calculate_det_for_stream(context, pipe) < + resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i])) { + dc->scratch.pipes_to_unlock_first[i] = true; + } + } +} + /** * dc_commit_state_no_check - Apply context to the hardware * @@ -1974,6 +2004,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c context->streams[i]->update_flags.bits.dsc_changed = prev_dsc_changed; } + determine_pipe_unlock_order(dc, context); /* Program all planes within new context*/ if (dc->res_pool->funcs->prepare_mcache_programming) dc->res_pool->funcs->prepare_mcache_programming(dc, context); @@ -2156,6 +2187,14 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params context->power_source = params->power_source; res = dc_validate_with_context(dc, set, params->stream_count, context, false); + + /* + * Only update link encoder to stream assignment after bandwidth validation passed. + */ + if (res == DC_OK && dc->res_pool->funcs->link_encs_assign) + dc->res_pool->funcs->link_encs_assign( + dc, context, context->streams, context->stream_count); + if (res != DC_OK) { BREAK_TO_DEBUGGER(); goto fail; @@ -2376,7 +2415,7 @@ static bool is_surface_in_context( return false; } -static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) +static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; enum surface_update_type update_type = UPDATE_TYPE_FAST; @@ -2455,7 +2494,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa /* todo: below are HW dependent, we should add a hook to * DCE/N resource and validated there. */ - if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) { + if (!dc->debug.skip_full_updated_if_possible) { /* swizzled mode requires RQ to be setup properly, * thus need to run DML to calculate RQ settings */ @@ -2477,41 +2516,35 @@ static enum surface_update_type get_scaling_info_update_type( if (!u->scaling_info) return UPDATE_TYPE_FAST; - if (u->scaling_info->dst_rect.width != u->surface->dst_rect.width + if (u->scaling_info->src_rect.width != u->surface->src_rect.width + || u->scaling_info->src_rect.height != u->surface->src_rect.height + || u->scaling_info->dst_rect.width != u->surface->dst_rect.width || u->scaling_info->dst_rect.height != u->surface->dst_rect.height + || u->scaling_info->clip_rect.width != u->surface->clip_rect.width + || u->scaling_info->clip_rect.height != u->surface->clip_rect.height || u->scaling_info->scaling_quality.integer_scaling != - u->surface->scaling_quality.integer_scaling - ) { + u->surface->scaling_quality.integer_scaling) { update_flags->bits.scaling_change = 1; + if (u->scaling_info->src_rect.width > u->surface->src_rect.width + || u->scaling_info->src_rect.height > u->surface->src_rect.height) + /* Making src rect bigger requires a bandwidth change */ + update_flags->bits.clock_change = 1; + if ((u->scaling_info->dst_rect.width < u->surface->dst_rect.width || u->scaling_info->dst_rect.height < u->surface->dst_rect.height) && (u->scaling_info->dst_rect.width < u->surface->src_rect.width || u->scaling_info->dst_rect.height < u->surface->src_rect.height)) /* Making dst rect smaller requires a bandwidth change */ update_flags->bits.bandwidth_change = 1; - } - if (u->scaling_info->src_rect.width != u->surface->src_rect.width - || u->scaling_info->src_rect.height != u->surface->src_rect.height) { - - update_flags->bits.scaling_change = 1; - if (u->scaling_info->src_rect.width > u->surface->src_rect.width - || u->scaling_info->src_rect.height > u->surface->src_rect.height) - /* Making src rect bigger requires a bandwidth change */ - update_flags->bits.clock_change = 1; + if (u->scaling_info->src_rect.width > dc->caps.max_optimizable_video_width && + (u->scaling_info->clip_rect.width > u->surface->clip_rect.width || + u->scaling_info->clip_rect.height > u->surface->clip_rect.height)) + /* Changing clip size of a large surface may result in MPC slice count change */ + update_flags->bits.bandwidth_change = 1; } - if (u->scaling_info->src_rect.width > dc->caps.max_optimizable_video_width && - (u->scaling_info->clip_rect.width > u->surface->clip_rect.width || - u->scaling_info->clip_rect.height > u->surface->clip_rect.height)) - /* Changing clip size of a large surface may result in MPC slice count change */ - update_flags->bits.bandwidth_change = 1; - - if (u->scaling_info->clip_rect.width != u->surface->clip_rect.width || - u->scaling_info->clip_rect.height != u->surface->clip_rect.height) - update_flags->bits.clip_size_change = 1; - if (u->scaling_info->src_rect.x != u->surface->src_rect.x || u->scaling_info->src_rect.y != u->surface->src_rect.y || u->scaling_info->clip_rect.x != u->surface->clip_rect.x @@ -2520,13 +2553,13 @@ static enum surface_update_type get_scaling_info_update_type( || u->scaling_info->dst_rect.y != u->surface->dst_rect.y) update_flags->bits.position_change = 1; + /* process every update flag before returning */ if (update_flags->bits.clock_change || update_flags->bits.bandwidth_change || update_flags->bits.scaling_change) return UPDATE_TYPE_FULL; - if (update_flags->bits.position_change || - update_flags->bits.clip_size_change) + if (update_flags->bits.position_change) return UPDATE_TYPE_MED; return UPDATE_TYPE_FAST; @@ -2547,7 +2580,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc, update_flags->raw = 0; // Reset all flags - type = get_plane_info_update_type(u); + type = get_plane_info_update_type(dc, u); elevate_update_type(&overall_type, type); type = get_scaling_info_update_type(dc, u); @@ -2596,6 +2629,12 @@ static enum surface_update_type det_surface_update(const struct dc *dc, elevate_update_type(&overall_type, UPDATE_TYPE_MED); } + if (u->sdr_white_level_nits) + if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) { + update_flags->bits.sdr_white_level_nits = 1; + elevate_update_type(&overall_type, UPDATE_TYPE_FULL); + } + if (u->cm2_params) { if ((u->cm2_params->component_settings.shaper_3dlut_setting != u->surface->mcm_shaper_3dlut_setting) @@ -2631,6 +2670,29 @@ static enum surface_update_type det_surface_update(const struct dc *dc, return overall_type; } +/* May need to flip the desktop plane in cases where MPO plane receives a flip but desktop plane doesn't + * while both planes are flip_immediate + */ +static void force_immediate_gsl_plane_flip(struct dc *dc, struct dc_surface_update *updates, int surface_count) +{ + bool has_flip_immediate_plane = false; + int i; + + for (i = 0; i < surface_count; i++) { + if (updates[i].surface->flip_immediate) { + has_flip_immediate_plane = true; + break; + } + } + + if (has_flip_immediate_plane && surface_count > 1) { + for (i = 0; i < surface_count; i++) { + if (updates[i].surface->flip_immediate) + updates[i].surface->update_flags.bits.addr_update = 1; + } + } +} + static enum surface_update_type check_update_surfaces_for_stream( struct dc *dc, struct dc_surface_update *updates, @@ -2690,6 +2752,12 @@ static enum surface_update_type check_update_surfaces_for_stream( stream_update->vrr_active_variable || stream_update->vrr_active_fixed)) su_flags->bits.fams_changed = 1; + if (stream_update->scaler_sharpener_update) + su_flags->bits.scaler_sharpener = 1; + + if (stream_update->sharpening_required) + su_flags->bits.sharpening_required = 1; + if (su_flags->raw != 0) overall_type = UPDATE_TYPE_FULL; @@ -2873,6 +2941,10 @@ static void copy_surface_update_to_plane( surface->hdr_mult = srf_update->hdr_mult; + if (srf_update->sdr_white_level_nits) + surface->sdr_white_level_nits = + srf_update->sdr_white_level_nits; + if (srf_update->blend_tf) memcpy(&surface->blend_tf, srf_update->blend_tf, sizeof(surface->blend_tf)); @@ -2998,6 +3070,7 @@ static void copy_stream_update_to_stream(struct dc *dc, stream->writeback_info[i] = update->wb_update->writeback_info[i]; } + if (update->dsc_config) { struct dc_dsc_config old_dsc_cfg = stream->timing.dsc_cfg; uint32_t old_dsc_enabled = stream->timing.flags.DSC; @@ -3022,6 +3095,10 @@ static void copy_stream_update_to_stream(struct dc *dc, update->dsc_config = NULL; } } + if (update->scaler_sharpener_update) + stream->scaler_sharpener_update = *update->scaler_sharpener_update; + if (update->sharpening_required) + stream->sharpening_required = *update->sharpening_required; } static void backup_planes_and_stream_state( @@ -3051,7 +3128,10 @@ static void restore_planes_and_stream_state( return; for (i = 0; i < status->plane_count; i++) { + /* refcount will always be valid, restore everything else */ + struct kref refcount = status->plane_states[i]->refcount; *status->plane_states[i] = scratch->plane_states[i]; + status->plane_states[i]->refcount = refcount; } *stream = scratch->stream_state; } @@ -3138,6 +3218,11 @@ static bool update_planes_and_stream_state(struct dc *dc, context = dc->current_state; update_type = dc_check_update_surfaces_for_stream( dc, srf_updates, surface_count, stream_update, stream_status); + /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream. + * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip + * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come. + */ + force_immediate_gsl_plane_flip(dc, srf_updates, surface_count); if (update_type == UPDATE_TYPE_FULL) backup_planes_and_stream_state(&dc->scratch.current_state, stream); @@ -3210,8 +3295,7 @@ static bool update_planes_and_stream_state(struct dc *dc, if (update_type != UPDATE_TYPE_MED) continue; - if (surface->update_flags.bits.clip_size_change || - surface->update_flags.bits.position_change) { + if (surface->update_flags.bits.position_change) { for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; @@ -3610,6 +3694,10 @@ static void commit_planes_for_stream_fast(struct dc *dc, struct pipe_ctx *top_pipe_to_program = NULL; struct dc_stream_status *stream_status = NULL; bool should_offload_fams2_flip = false; + bool should_lock_all_pipes = (update_type != UPDATE_TYPE_FAST); + + if (should_lock_all_pipes) + determine_pipe_unlock_order(dc, context); if (dc->debug.fams2_config.bits.enable && dc->debug.fams2_config.bits.enable_offload_flip && @@ -3662,13 +3750,14 @@ static void commit_planes_for_stream_fast(struct dc *dc, if (!pipe_ctx->plane_state) continue; - if (should_update_pipe_for_plane(context, pipe_ctx, plane_state)) + if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state)) continue; + pipe_ctx->plane_state->triplebuffer_flips = false; if (update_type == UPDATE_TYPE_FAST && - dc->hwss.program_triplebuffer != NULL && - !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) { - /*triple buffer for VUpdate only*/ + dc->hwss.program_triplebuffer != NULL && + !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) { + /*triple buffer for VUpdate only*/ pipe_ctx->plane_state->triplebuffer_flips = true; } } @@ -3727,6 +3816,8 @@ static void commit_planes_for_stream(struct dc *dc, bool subvp_curr_use = false; uint8_t current_stream_mask = 0; + if (should_lock_all_pipes) + determine_pipe_unlock_order(dc, context); // Once we apply the new subvp context to hardware it won't be in the // dc->current_state anymore, so we have to cache it before we apply // the new SubVP context @@ -3734,7 +3825,7 @@ static void commit_planes_for_stream(struct dc *dc, dc_exit_ips_for_hw_access(dc); dc_z10_restore(dc); - if (update_type == UPDATE_TYPE_FULL) + if (update_type == UPDATE_TYPE_FULL && dc->optimized_required) hwss_process_outstanding_hw_updates(dc, dc->current_state); for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -3761,6 +3852,9 @@ static void commit_planes_for_stream(struct dc *dc, context_clock_trace(dc, context); } + if (update_type == UPDATE_TYPE_FULL) + hwss_wait_for_outstanding_hw_updates(dc, dc->current_state); + top_pipe_to_program = resource_get_otg_master_for_stream( &context->res_ctx, stream); @@ -3905,19 +3999,20 @@ static void commit_planes_for_stream(struct dc *dc, struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; if (!pipe_ctx->plane_state) continue; - if (should_update_pipe_for_plane(context, pipe_ctx, plane_state)) + if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state)) continue; pipe_ctx->plane_state->triplebuffer_flips = false; if (update_type == UPDATE_TYPE_FAST && - dc->hwss.program_triplebuffer != NULL && - !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) { - /*triple buffer for VUpdate only*/ - pipe_ctx->plane_state->triplebuffer_flips = true; + dc->hwss.program_triplebuffer != NULL && + !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) { + /*triple buffer for VUpdate only*/ + pipe_ctx->plane_state->triplebuffer_flips = true; } } if (update_type == UPDATE_TYPE_FULL) { /* force vsync flip when reconfiguring pipes to prevent underflow */ plane_state->flip_immediate = false; + plane_state->triplebuffer_flips = false; } } @@ -3938,7 +4033,6 @@ static void commit_planes_for_stream(struct dc *dc, continue; ASSERT(!pipe_ctx->plane_state->triplebuffer_flips); - if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { /*turn off triple buffer for full update*/ dc->hwss.program_triplebuffer( @@ -4013,7 +4107,7 @@ static void commit_planes_for_stream(struct dc *dc, /*program triple buffer after lock based on flip type*/ if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { - /*only enable triplebuffer for fast_update*/ + /*only enable triplebuffer for fast_update*/ dc->hwss.program_triplebuffer( dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips); } @@ -4674,6 +4768,8 @@ static bool full_update_required(struct dc *dc, srf_updates[i].scaling_info || (srf_updates[i].hdr_mult.value && srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) || + (srf_updates[i].sdr_white_level_nits && + srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) || srf_updates[i].in_transfer_func || srf_updates[i].func_shaper || srf_updates[i].lut3d_func || @@ -4713,7 +4809,8 @@ static bool full_update_required(struct dc *dc, stream_update->func_shaper || stream_update->lut3d_func || stream_update->pending_test_pattern || - stream_update->crtc_timing_adjust)) + stream_update->crtc_timing_adjust || + stream_update->scaler_sharpener_update)) return true; if (stream) { @@ -4759,6 +4856,11 @@ static bool update_planes_and_stream_v1(struct dc *dc, update_type = dc_check_update_surfaces_for_stream( dc, srf_updates, surface_count, stream_update, stream_status); + /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream. + * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip + * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come. + */ + force_immediate_gsl_plane_flip(dc, srf_updates, surface_count); if (update_type >= UPDATE_TYPE_FULL) { @@ -5047,11 +5149,26 @@ static bool update_planes_and_stream_v3(struct dc *dc, return true; } +static void clear_update_flags(struct dc_surface_update *srf_updates, + int surface_count, struct dc_stream_state *stream) +{ + int i; + + if (stream) + stream->update_flags.raw = 0; + + for (i = 0; i < surface_count; i++) + if (srf_updates[i].surface) + srf_updates[i].surface->update_flags.raw = 0; +} + bool dc_update_planes_and_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, struct dc_stream_update *stream_update) { + bool ret = false; + dc_exit_ips_for_hw_access(dc); /* * update planes and stream version 3 separates FULL and FAST updates @@ -5068,10 +5185,16 @@ bool dc_update_planes_and_stream(struct dc *dc, * features as they are now transparent to the new sequence. */ if (dc->ctx->dce_version >= DCN_VERSION_4_01) - return update_planes_and_stream_v3(dc, srf_updates, + ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, stream, stream_update); - return update_planes_and_stream_v2(dc, srf_updates, + else + ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); + + if (ret) + clear_update_flags(srf_updates, surface_count, stream); + + return ret; } void dc_commit_updates_for_stream(struct dc *dc, @@ -5081,6 +5204,8 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_stream_update *stream_update, struct dc_state *state) { + bool ret = false; + dc_exit_ips_for_hw_access(dc); /* TODO: Since change commit sequence can have a huge impact, * we decided to only enable it for DCN3x. However, as soon as @@ -5088,17 +5213,17 @@ void dc_commit_updates_for_stream(struct dc *dc, * the new sequence for all ASICs. */ if (dc->ctx->dce_version >= DCN_VERSION_4_01) { - update_planes_and_stream_v3(dc, srf_updates, surface_count, + ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, stream, stream_update); - return; - } - if (dc->ctx->dce_version >= DCN_VERSION_3_2) { - update_planes_and_stream_v2(dc, srf_updates, surface_count, + } else if (dc->ctx->dce_version >= DCN_VERSION_3_2) { + ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - return; - } - update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, - stream_update, state); + } else + ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, + stream_update, state); + + if (ret) + clear_update_flags(srf_updates, surface_count, stream); } uint8_t dc_get_current_stream_count(struct dc *dc) @@ -5161,6 +5286,8 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) dc_z10_restore(dc); + dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); + dc->hwss.init_hw(dc); if (dc->hwss.init_sys_ctx != NULL && @@ -5172,6 +5299,8 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) default: ASSERT(dc->current_state->stream_count == 0); + dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); + dc_state_destruct(dc->current_state); break; @@ -5733,6 +5862,27 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, return DC_OK; } +/** + * dc_process_dmub_dpia_set_tps_notification - Submits tps notification + * + * @dc: [in] dc structure + * @link_index: [in] link index + * @tps: [in] request tps + * + * Submits set_tps_notification command to dmub via inbox message + */ +void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps) +{ + union dmub_rb_cmd cmd = {0}; + + cmd.set_tps_notification.header.type = DMUB_CMD__DPIA; + cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION; + cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst; + cmd.set_tps_notification.tps_notification.tps = tps; + + dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + /** * dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption * @@ -5933,7 +6083,12 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state { struct dc_power_profile profile = { 0 }; - profile.power_level += !context->bw_ctx.bw.dcn.clk.p_state_change_support; + profile.power_level = !context->bw_ctx.bw.dcn.clk.p_state_change_support; + if (!context->clk_mgr || !context->clk_mgr->ctx || !context->clk_mgr->ctx->dc) + return profile; + struct dc *dc = context->clk_mgr->ctx->dc; + if (dc->res_pool->funcs->get_power_profile) + profile.power_level = dc->res_pool->funcs->get_power_profile(context); return profile; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 7ee2be8f82c46..2fdcf8d59b9f5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -497,6 +497,23 @@ void get_mclk_switch_visual_confirm_color( } } +void get_cursor_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; + + if (pipe_ctx->stream && pipe_ctx->stream->cursor_position.enable) { + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + } else { + color->color_r_cr = 0; + color->color_g_y = 0; + color->color_b_cb = color_value; + } +} + void set_p_state_switch_method( struct dc *dc, struct dc_state *context, @@ -1071,8 +1088,13 @@ void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_con if (!pipe_ctx->stream) continue; - if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) - pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); + /* For full update we must wait for all double buffer updates, not just DRR updates. This + * is particularly important for minimal transitions. Only check for OTG_MASTER pipes, + * as non-OTG Master pipes share the same OTG as + */ + if (resource_is_pipe_type(pipe_ctx, OTG_MASTER) && dc->hwss.wait_for_all_pending_updates) { + dc->hwss.wait_for_all_pending_updates(pipe_ctx); + } hubp = pipe_ctx->plane_res.hubp; if (!hubp) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b38340c690c60..33125b95c3a13 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -342,11 +342,6 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc, res_pool->ref_clocks.xtalin_clock_inKhz; res_pool->ref_clocks.dchub_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; - if (dc->debug.using_dml2) - if (res_pool->hubbub && res_pool->hubbub->funcs->get_dchub_ref_freq) - res_pool->hubbub->funcs->get_dchub_ref_freq(res_pool->hubbub, - res_pool->ref_clocks.dccg_ref_clock_inKhz, - &res_pool->ref_clocks.dchub_ref_clock_inKhz); } else ASSERT_CRITICAL(false); } @@ -770,25 +765,6 @@ static inline void get_vp_scan_direction( *flip_horz_scan_dir = !*flip_horz_scan_dir; } -/* - * This is a preliminary vp size calculation to allow us to check taps support. - * The result is completely overridden afterwards. - */ -static void calculate_viewport_size(struct pipe_ctx *pipe_ctx) -{ - struct scaler_data *data = &pipe_ctx->plane_res.scl_data; - - data->viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz, data->recout.width)); - data->viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert, data->recout.height)); - data->viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz_c, data->recout.width)); - data->viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert_c, data->recout.height)); - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || - pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) { - swap(data->viewport.width, data->viewport.height); - swap(data->viewport_c.width, data->viewport_c.height); - } -} - static struct rect intersect_rec(const struct rect *r0, const struct rect *r1) { struct rect rec; @@ -1473,6 +1449,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) const struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; const struct rect odm_slice_src = resource_get_odm_slice_src_rect(pipe_ctx); + struct scaling_taps temp = {0}; bool res = false; DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); @@ -1511,8 +1488,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha; - spl_out->scl_data.h_active = pipe_ctx->plane_res.scl_data.h_active; - spl_out->scl_data.v_active = pipe_ctx->plane_res.scl_data.v_active; // Convert pipe_ctx to respective input params for SPL translate_SPL_in_params_from_pipe_ctx(pipe_ctx, spl_in); @@ -1532,8 +1507,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) calculate_recout(pipe_ctx); /* depends on pixel format */ calculate_scaling_ratios(pipe_ctx); - /* depends on scaling ratios and recout, does not calculate offset yet */ - calculate_viewport_size(pipe_ctx); /* * LB calculations depend on vp size, h/v_active and scaling ratios @@ -1554,6 +1527,24 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha; + // get TAP value with 100x100 dummy data for max scaling qualify, override + // if a new scaling quality required + pipe_ctx->plane_res.scl_data.viewport.width = 100; + pipe_ctx->plane_res.scl_data.viewport.height = 100; + pipe_ctx->plane_res.scl_data.viewport_c.width = 100; + pipe_ctx->plane_res.scl_data.viewport_c.height = 100; + if (pipe_ctx->plane_res.xfm != NULL) + res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( + pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); + + if (pipe_ctx->plane_res.dpp != NULL) + res = pipe_ctx->plane_res.dpp->funcs->dpp_get_optimal_number_of_taps( + pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); + + temp = pipe_ctx->plane_res.scl_data.taps; + + calculate_inits_and_viewports(pipe_ctx); + if (pipe_ctx->plane_res.xfm != NULL) res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); @@ -1580,11 +1571,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) &plane_state->scaling_quality); } - /* - * Depends on recout, scaling ratios, h_active and taps - * May need to re-check lb size after this in some obscure scenario - */ - if (res) + if (res && (pipe_ctx->plane_res.scl_data.taps.v_taps != temp.v_taps || + pipe_ctx->plane_res.scl_data.taps.h_taps != temp.h_taps || + pipe_ctx->plane_res.scl_data.taps.v_taps_c != temp.v_taps_c || + pipe_ctx->plane_res.scl_data.taps.h_taps_c != temp.h_taps_c)) calculate_inits_and_viewports(pipe_ctx); /* @@ -4101,14 +4091,6 @@ enum dc_status dc_validate_global_state( if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate)) result = DC_FAIL_BANDWIDTH_VALIDATE; - /* - * Only update link encoder to stream assignment after bandwidth validation passed. - * TODO: Split out assignment and validation. - */ - if (result == DC_OK && dc->res_pool->funcs->link_encs_assign && fast_validate == false) - dc->res_pool->funcs->link_encs_assign( - dc, new_ctx, new_ctx->streams, new_ctx->stream_count); - return result; } @@ -5303,3 +5285,16 @@ int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *o } return det_segments; } + +bool resource_is_hpo_acquired(struct dc_state *context) +{ + int i; + + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { + return true; + } + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 2597e3fd562bb..e006f816ff2f7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -265,6 +265,9 @@ struct dc_state *dc_state_create_copy(struct dc_state *src_state) dc_state_copy_internal(new_state, src_state); #ifdef CONFIG_DRM_AMD_DC_FP + new_state->bw_ctx.dml2 = NULL; + new_state->bw_ctx.dml2_dc_power_source = NULL; + if (src_state->bw_ctx.dml2 && !dml2_create_copy(&new_state->bw_ctx.dml2, src_state->bw_ctx.dml2)) { dc_state_release(new_state); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index be2638c763d78..aca2821d546b1 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -290,7 +290,9 @@ bool dc_stream_set_cursor_attributes( * 2. If not subvp high refresh, for single display cases, if resolution is >= 5K and refresh rate < 120hz * 3. If not subvp high refresh, for multi display cases, if resolution is >= 4K and refresh rate < 120hz */ - if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) { + if (dc->debug.allow_sw_cursor_fallback && + attributes->height * attributes->width * 4 > 16384 && + !stream->hw_cursor_req) { if (check_subvp_sw_cursor_fallback_req(dc, stream)) return false; } @@ -419,7 +421,6 @@ bool dc_stream_program_cursor_position( /* apply/update visual confirm */ if (dc->debug.visual_confirm == VISUAL_CONFIRM_HW_CURSOR) { /* update software state */ - uint32_t color_value = MAX_TG_COLOR_VALUE; int i; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -427,15 +428,7 @@ bool dc_stream_program_cursor_position( /* adjust visual confirm color for all pipes with current stream */ if (stream == pipe_ctx->stream) { - if (stream->cursor_position.enable) { - pipe_ctx->visual_confirm_color.color_r_cr = color_value; - pipe_ctx->visual_confirm_color.color_g_y = 0; - pipe_ctx->visual_confirm_color.color_b_cb = 0; - } else { - pipe_ctx->visual_confirm_color.color_r_cr = 0; - pipe_ctx->visual_confirm_color.color_g_y = 0; - pipe_ctx->visual_confirm_color.color_b_cb = color_value; - } + get_cursor_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color)); /* programming hardware */ if (pipe_ctx->plane_state) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6b036417a73ae..940d2fb0ee83d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.296" +#define DC_VER "3.2.304" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -227,6 +227,10 @@ struct dc_dmub_caps { uint8_t fams_ver; }; +struct dc_scl_caps { + bool sharpener_support; +}; + struct dc_caps { uint32_t max_streams; uint32_t max_links; @@ -285,6 +289,7 @@ struct dc_caps { uint16_t subvp_vertical_int_margin_us; bool seamless_odm; uint32_t max_v_total; + bool vtotal_limited_by_fp2; uint32_t max_disp_clock_khz_at_vmin; uint8_t subvp_drr_vblank_start_margin_us; bool cursor_not_scaled; @@ -292,6 +297,7 @@ struct dc_caps { bool sequential_ono; /* Conservative limit for DCC cases which require ODM4:1 to support*/ uint32_t dcc_plane_width_limit; + struct dc_scl_caps scl_caps; }; struct dc_bug_wa { @@ -462,6 +468,8 @@ struct dc_config { bool support_edp0_on_dp1; unsigned int enable_fpo_flicker_detection; bool disable_hbr_audio_dp2; + bool consolidated_dpia_dp_lt; + bool set_pipe_unlock_order; }; enum visual_confirm { @@ -761,7 +769,9 @@ union dpia_debug_options { uint32_t extend_aux_rd_interval:1; /* bit 2 */ uint32_t disable_mst_dsc_work_around:1; /* bit 3 */ uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ - uint32_t reserved:27; + uint32_t disable_usb4_pm_support:1; /* bit 5 */ + uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ + uint32_t reserved:25; } bits; uint32_t raw; }; @@ -1010,6 +1020,9 @@ struct dc_debug_options { unsigned int min_prefetch_in_strobe_ns; bool disable_unbounded_requesting; bool dig_fifo_off_in_blank; + #ifndef HAVE_DRM_DP_REMOVE_RAYLOAD_PART + bool temp_mst_deallocation_sequence; + #endif bool override_dispclk_programming; bool otg_crc_db; bool disallow_dispclk_dppclk_ds; @@ -1051,9 +1064,14 @@ struct dc_debug_options { unsigned int disable_spl; unsigned int force_easf; unsigned int force_sharpness; + unsigned int force_sharpness_level; unsigned int force_lls; bool notify_dpia_hr_bw; bool enable_ips_visual_confirm; + unsigned int sharpen_policy; + unsigned int scale_to_sharpness_policy; + bool skip_full_updated_if_possible; + unsigned int enable_oled_edp_power_up_opt; }; @@ -1246,7 +1264,6 @@ union surface_update_flags { uint32_t rotation_change:1; uint32_t swizzle_change:1; uint32_t scaling_change:1; - uint32_t clip_size_change: 1; uint32_t position_change:1; uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; @@ -1267,6 +1284,7 @@ union surface_update_flags { uint32_t tmz_changed:1; uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */ uint32_t full_update:1; + uint32_t sdr_white_level_nits:1; } bits; uint32_t raw; @@ -1347,8 +1365,10 @@ struct dc_plane_state { enum mpcc_movable_cm_location mcm_location; struct dc_csc_transform cursor_csc_color_matrix; bool adaptive_sharpness_en; - unsigned int sharpnessX1000; + int adaptive_sharpness_policy; + int sharpness_level; enum linear_light_scaling linear_light_scaling; + unsigned int sdr_white_level_nits; }; struct dc_plane_info { @@ -1452,6 +1472,7 @@ struct dc { struct dc_scratch_space current_state; struct dc_scratch_space new_state; struct dc_stream_state temp_stream; // Used so we don't need to allocate stream on the stack + bool pipes_to_unlock_first[MAX_PIPES]; /* Any of the pipes indicated here should be unlocked first */ } scratch; struct dml2_configuration_options dml2_options; @@ -1504,8 +1525,9 @@ struct dc_surface_update { * change cm2_params.component_settings: Full update * change cm2_params.cm2_luts: Fast update */ - struct dc_cm2_parameters *cm2_params; + const struct dc_cm2_parameters *cm2_params; const struct dc_csc_transform *cursor_csc_color_matrix; + unsigned int sdr_white_level_nits; }; /* @@ -1775,6 +1797,7 @@ struct dc_link { // BW ALLOCATON USB4 ONLY struct dc_dpia_bw_alloc dpia_bw_alloc_config; bool skip_implict_edp_power_control; + enum backlight_control_type backlight_control_type; }; /* Return an enumerated dc_link. @@ -2517,6 +2540,8 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc, uint8_t mst_alloc_slots, uint8_t *mst_slots_in_use); +void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps); + void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc, uint32_t hpd_int_enable); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index b1265124608be..3096f24012f85 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1294,6 +1294,8 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) memset(&new_signals, 0, sizeof(new_signals)); + new_signals.bits.allow_idle = 1; /* always set */ + if (dc->config.disable_ips == DMUB_IPS_ENABLE || dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) { new_signals.bits.allow_pg = 1; @@ -1389,7 +1391,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) */ dc_dmub_srv->needs_idle_wake = false; - if (prev_driver_signals.bits.allow_ips2 && + if ((prev_driver_signals.bits.allow_ips2 || prev_driver_signals.all == 0) && (!dc->debug.optimize_ips_handshake || ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle)) { DC_LOG_IPS( @@ -1450,7 +1452,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) } dc_dmub_srv_notify_idle(dc, false); - if (prev_driver_signals.bits.allow_ips1) { + if (prev_driver_signals.bits.allow_ips1 || prev_driver_signals.all == 0) { DC_LOG_IPS( "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, @@ -1476,7 +1478,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ips2_exit_count); } -void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState) +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state) { struct dmub_srv *dmub; @@ -1485,12 +1487,38 @@ void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_c dmub = dc_dmub_srv->dmub; - if (powerState == DC_ACPI_CM_POWER_STATE_D0) + if (power_state == DC_ACPI_CM_POWER_STATE_D0) dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D0); else dmub_srv_set_power_state(dmub, DMUB_POWER_STATE_D3); } +void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv, + enum dc_acpi_cm_power_state power_state) +{ + union dmub_rb_cmd cmd; + + if (!dc_dmub_srv) + return; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.idle_opt_set_dc_power_state.header.type = DMUB_CMD__IDLE_OPT; + cmd.idle_opt_set_dc_power_state.header.sub_type = DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE; + cmd.idle_opt_set_dc_power_state.header.payload_bytes = + sizeof(cmd.idle_opt_set_dc_power_state) - sizeof(cmd.idle_opt_set_dc_power_state.header); + + if (power_state == DC_ACPI_CM_POWER_STATE_D0) { + cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D0; + } else if (power_state == DC_ACPI_CM_POWER_STATE_D3) { + cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_D3; + } else { + cmd.idle_opt_set_dc_power_state.data.power_state = DMUB_IDLE_OPT_DC_POWER_STATE_UNKNOWN; + } + + dc_wake_and_execute_dmub_cmd(dc_dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + bool dc_dmub_srv_should_detect(struct dc_dmub_srv *dc_dmub_srv) { volatile const struct dmub_shared_state_ips_fw *ips_fw; @@ -1836,3 +1864,81 @@ void dc_dmub_srv_fams2_passthrough_flip( dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmds, DM_DMUB_WAIT_TYPE_WAIT); } } + +bool dc_dmub_srv_ips_residency_cntl(struct dc_dmub_srv *dc_dmub_srv, bool start_measurement) +{ + bool result; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + result = dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IPS_RESIDENCY, + start_measurement, NULL, DM_DMUB_WAIT_TYPE_WAIT); + + return result; +} + +void dc_dmub_srv_ips_query_residency_info(struct dc_dmub_srv *dc_dmub_srv, struct ips_residency_info *output) +{ + uint32_t i; + enum dmub_gpint_command command_code; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return; + + switch (output->ips_mode) { + case DMUB_IPS_MODE_IPS1_MAX: + command_code = DMUB_GPINT__GET_IPS1_HISTOGRAM_COUNTER; + break; + case DMUB_IPS_MODE_IPS2: + command_code = DMUB_GPINT__GET_IPS2_HISTOGRAM_COUNTER; + break; + case DMUB_IPS_MODE_IPS1_RCG: + command_code = DMUB_GPINT__GET_IPS1_RCG_HISTOGRAM_COUNTER; + break; + case DMUB_IPS_MODE_IPS1_ONO2_ON: + command_code = DMUB_GPINT__GET_IPS1_ONO2_ON_HISTOGRAM_COUNTER; + break; + default: + command_code = DMUB_GPINT__INVALID_COMMAND; + break; + } + + if (command_code == DMUB_GPINT__INVALID_COMMAND) + return; + + // send gpint commands and wait for ack + if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_PERCENT, + (uint16_t)(output->ips_mode), + &output->residency_percent, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + output->residency_percent = 0; + + if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_ENTRY_COUNTER, + (uint16_t)(output->ips_mode), + &output->entry_counter, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + output->entry_counter = 0; + + if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_LO, + (uint16_t)(output->ips_mode), + &output->total_active_time_us[0], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + output->total_active_time_us[0] = 0; + if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_HI, + (uint16_t)(output->ips_mode), + &output->total_active_time_us[1], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + output->total_active_time_us[1] = 0; + + if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_LO, + (uint16_t)(output->ips_mode), + &output->total_inactive_time_us[0], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + output->total_inactive_time_us[0] = 0; + if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_HI, + (uint16_t)(output->ips_mode), + &output->total_inactive_time_us[1], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + output->total_inactive_time_us[1] = 0; + + // NUM_IPS_HISTOGRAM_BUCKETS = 16 + for (i = 0; i < 16; i++) + if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, command_code, i, &output->histogram[i], + DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + output->histogram[i] = 0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 580940222777e..10b48198b7a62 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -109,7 +109,29 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait); void dc_dmub_srv_apply_idle_power_optimizations(const struct dc *dc, bool allow_idle); -void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state powerState); +/** + * dc_dmub_srv_set_power_state() - Sets the power state for DMUB service. + * + * Controls whether messaging the DMCUB or interfacing with it via HW register + * interaction is permittable. + * + * @dc_dmub_srv - The DC DMUB service pointer + * @power_state - the DC power state + */ +void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state); + +/** + * dc_dmub_srv_notify_fw_dc_power_state() - Notifies firmware of the DC power state. + * + * Differs from dc_dmub_srv_set_power_state in that it needs to access HW in order + * to message DMCUB of the state transition. Should come after the D0 exit and + * before D3 set power state. + * + * @dc_dmub_srv - The DC DMUB service pointer + * @power_state - the DC power state + */ +void dc_dmub_srv_notify_fw_dc_power_state(struct dc_dmub_srv *dc_dmub_srv, + enum dc_acpi_cm_power_state power_state); /** * @dc_dmub_srv_should_detect() - Checks if link detection is required. @@ -187,4 +209,43 @@ void dc_dmub_srv_fams2_passthrough_flip( struct dc_stream_state *stream, struct dc_surface_update *srf_updates, int surface_count); + +/** + * struct ips_residency_info - struct containing info from dmub_ips_residency_stats + * + * @ips_mode: The mode of IPS that the follow stats appertain to + * @residency_percent: The percentage of time spent in given IPS mode in millipercent + * @entry_counter: The number of entries made in to this IPS state + * @total_active_time_us: uint32_t array of length 2 representing time in the given IPS mode + * in microseconds. Index 0 is lower 32 bits, index 1 is upper 32 bits. + * @total_inactive_time_us: uint32_t array of length 2 representing time outside the given IPS mode + * in microseconds. Index 0 is lower 32 bits, index 1 is upper 32 bits. + * @histogram: Histogram of given IPS state durations - bucket definitions in dmub_ips.c + */ +struct ips_residency_info { + enum dmub_ips_mode ips_mode; + unsigned int residency_percent; + unsigned int entry_counter; + unsigned int total_active_time_us[2]; + unsigned int total_inactive_time_us[2]; + unsigned int histogram[16]; +}; + +/** + * bool dc_dmub_srv_ips_residency_cntl() - Controls IPS residency measurement status + * + * @dc_dmub_srv: The DC DMUB service pointer + * @start_measurement: Describes whether to start or stop measurement + * + * Return: true if GPINT was sent successfully, false otherwise + */ +bool dc_dmub_srv_ips_residency_cntl(struct dc_dmub_srv *dc_dmub_srv, bool start_measurement); + +/** + * bool dc_dmub_srv_ips_query_residency_info() - Queries DMCUB for residency info + * + * @dc_dmub_srv: The DC DMUB service pointer + * @output: Output struct to copy the the residency info to + */ +void dc_dmub_srv_ips_query_residency_info(struct dc_dmub_srv *dc_dmub_srv, struct ips_residency_info *output); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 519c3df78ee5b..8dd6eb044829a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -969,6 +969,14 @@ union dp_sink_video_fallback_formats { uint8_t raw; }; +union dpcd_max_uncompressed_pixel_rate_cap { + struct { + uint16_t max_uncompressed_pixel_rate_cap :15; + uint16_t valid :1; + } bits; + uint8_t raw[2]; +}; + union dp_fec_capability1 { struct { uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1; @@ -1158,6 +1166,7 @@ struct dpcd_caps { int8_t branch_dev_name[6]; int8_t branch_hw_revision; int8_t branch_fw_revision[2]; + int8_t branch_vendor_specific_data[4]; bool allow_invalid_MSA_timing_param; bool panel_mode_edp; @@ -1170,6 +1179,7 @@ struct dpcd_caps { struct dc_lttpr_caps lttpr_caps; struct adaptive_sync_caps adaptive_sync_caps; struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info; + union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap; union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates; union dp_main_line_channel_coding_cap channel_coding_cap; @@ -1182,6 +1192,7 @@ struct dpcd_caps { struct edp_psr_info psr_info; struct replay_info pr_info; + uint16_t edp_oled_emission_rate; }; union dpcd_sink_ext_caps { @@ -1195,7 +1206,7 @@ union dpcd_sink_ext_caps { uint8_t oled : 1; uint8_t reserved_2 : 1; uint8_t miniled : 1; - uint8_t reserved : 1; + uint8_t emission_output : 1; } bits; uint8_t raw; }; @@ -1340,12 +1351,18 @@ struct dp_trace { #ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX #define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110 #endif +#ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP +#define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c +#endif #ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #endif #ifndef DP_TUNNELING_IRQ #define DP_TUNNELING_IRQ (1 << 5) #endif +#ifndef DP_BRANCH_VENDOR_SPECIFIC_START +#define DP_BRANCH_VENDOR_SPECIFIC_START 0x50C +#endif /** USB4 DPCD BW Allocation Registers Chapter 10.7 **/ #ifndef DP_TUNNELING_CAPABILITIES #define DP_TUNNELING_CAPABILITIES 0xE000D /* 1.4a */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h index fe3078b8789ef..9014c24098178 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h @@ -59,6 +59,7 @@ struct dc_dsc_config_options { uint32_t max_target_bpp_limit_override_x16; uint32_t slice_height_granularity; uint32_t dsc_force_odm_hslice_override; + bool force_dsc_when_not_needed; }; bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, @@ -100,7 +101,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( */ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy); + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding); void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit); diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index b402be59b2c83..8d0eb9798254a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -28,8 +28,6 @@ */ #include -#include - #include "dm_services.h" #include "dc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h index 44afcd9892248..bd37ec82b42d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_plane.h +++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h @@ -26,7 +26,6 @@ #ifndef _DC_PLANE_H_ #define _DC_PLANE_H_ -#include "dc.h" #include "dc_hw_types.h" struct dc_plane_state *dc_create_plane_state(const struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 8f85a1db5eba4..24aa9df892f3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -38,30 +38,31 @@ static void populate_spltaps_from_taps(struct spl_taps *spl_scaling_quality, spl_scaling_quality->h_taps = scaling_quality->h_taps; spl_scaling_quality->v_taps_c = scaling_quality->v_taps_c; spl_scaling_quality->v_taps = scaling_quality->v_taps; + spl_scaling_quality->integer_scaling = scaling_quality->integer_scaling; } static void populate_taps_from_spltaps(struct scaling_taps *scaling_quality, const struct spl_taps *spl_scaling_quality) { - scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c; - scaling_quality->h_taps = spl_scaling_quality->h_taps; - scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c; - scaling_quality->v_taps = spl_scaling_quality->v_taps; + scaling_quality->h_taps_c = spl_scaling_quality->h_taps_c + 1; + scaling_quality->h_taps = spl_scaling_quality->h_taps + 1; + scaling_quality->v_taps_c = spl_scaling_quality->v_taps_c + 1; + scaling_quality->v_taps = spl_scaling_quality->v_taps + 1; } static void populate_ratios_from_splratios(struct scaling_ratios *ratios, - const struct spl_ratios *spl_ratios) + const struct ratio *spl_ratios) { - ratios->horz = spl_ratios->horz; - ratios->vert = spl_ratios->vert; - ratios->horz_c = spl_ratios->horz_c; - ratios->vert_c = spl_ratios->vert_c; + ratios->horz = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio >> 5, 3, 19); + ratios->vert = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio >> 5, 3, 19); + ratios->horz_c = dc_fixpt_from_ux_dy(spl_ratios->h_scale_ratio_c >> 5, 3, 19); + ratios->vert_c = dc_fixpt_from_ux_dy(spl_ratios->v_scale_ratio_c >> 5, 3, 19); } static void populate_inits_from_splinits(struct scl_inits *inits, - const struct spl_inits *spl_inits) + const struct init *spl_inits) { - inits->h = spl_inits->h; - inits->v = spl_inits->v; - inits->h_c = spl_inits->h_c; - inits->v_c = spl_inits->v_c; + inits->h = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int, spl_inits->h_filter_init_frac >> 5, 0, 19); + inits->v = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int, spl_inits->v_filter_init_frac >> 5, 0, 19); + inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); + inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); } /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx @@ -139,24 +140,36 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl else if (pipe_ctx->stream->ctx->dc->debug.force_easf == 2) spl_in->disable_easf = true; /* Translate adaptive sharpening preference */ - if (pipe_ctx->stream->ctx->dc->debug.force_sharpness > 0) { - spl_in->adaptive_sharpness.enable = (pipe_ctx->stream->ctx->dc->debug.force_sharpness > 1) ? true : false; - if (pipe_ctx->stream->ctx->dc->debug.force_sharpness == 2) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_LOW; - else if (pipe_ctx->stream->ctx->dc->debug.force_sharpness == 3) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_MID; - else if (pipe_ctx->stream->ctx->dc->debug.force_sharpness >= 4) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_HIGH; - } else { - spl_in->adaptive_sharpness.enable = plane_state->adaptive_sharpness_en; - if (plane_state->sharpnessX1000 == 0) + unsigned int sharpness_setting = pipe_ctx->stream->ctx->dc->debug.force_sharpness; + unsigned int force_sharpness_level = pipe_ctx->stream->ctx->dc->debug.force_sharpness_level; + if (sharpness_setting == SHARPNESS_HW_OFF) + spl_in->adaptive_sharpness.enable = false; + else if (sharpness_setting == SHARPNESS_ZERO) { + spl_in->adaptive_sharpness.enable = true; + spl_in->adaptive_sharpness.sharpness_level = 0; + } else if (sharpness_setting == SHARPNESS_CUSTOM) { + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_min = 0; + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_max = 1750; + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_mid = 750; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_min = 0; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_max = 3500; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_mid = 1500; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_min = 0; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_max = 2750; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_mid = 1500; + + if (force_sharpness_level > 0) { + if (force_sharpness_level > 10) + force_sharpness_level = 10; + spl_in->adaptive_sharpness.enable = true; + spl_in->adaptive_sharpness.sharpness_level = force_sharpness_level; + } else if (!plane_state->adaptive_sharpness_en) { spl_in->adaptive_sharpness.enable = false; - else if (plane_state->sharpnessX1000 < 999) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_LOW; - else if (plane_state->sharpnessX1000 < 1999) - spl_in->adaptive_sharpness.sharpness = SHARPNESS_MID; - else // Any other value is high sharpness - spl_in->adaptive_sharpness.sharpness = SHARPNESS_HIGH; + spl_in->adaptive_sharpness.sharpness_level = 0; + } else { + spl_in->adaptive_sharpness.enable = true; + spl_in->adaptive_sharpness.sharpness_level = plane_state->sharpness_level; + } } // Translate linear light scaling preference if (pipe_ctx->stream->ctx->dc->debug.force_lls > 0) @@ -171,6 +184,20 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; + + spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active; + spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active; + + spl_in->sharpen_policy = (enum sharpen_policy)plane_state->adaptive_sharpness_policy; + spl_in->debug.scale_to_sharpness_policy = + (enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy; + + /* Check if it is stream is in fullscreen and if its HDR. + * Use this to determine sharpness levels + */ + spl_in->is_fullscreen = pipe_ctx->stream->sharpening_required; + spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream); + spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits; } /// @brief Translate SPL output parameters to pipe context @@ -179,15 +206,15 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl void translate_SPL_out_params_to_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl_out *spl_out) { // Make scaler data recout point to spl output field recout - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->scl_data.recout); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.recout, &spl_out->dscl_prog_data->recout); // Make scaler data ratios point to spl output field ratios - populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->scl_data.ratios); + populate_ratios_from_splratios(&pipe_ctx->plane_res.scl_data.ratios, &spl_out->dscl_prog_data->ratios); // Make scaler data viewport point to spl output field viewport - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->scl_data.viewport); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport, &spl_out->dscl_prog_data->viewport); // Make scaler data viewport_c point to spl output field viewport_c - populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->scl_data.viewport_c); + populate_rect_from_splrect(&pipe_ctx->plane_res.scl_data.viewport_c, &spl_out->dscl_prog_data->viewport_c); // Make scaler data taps point to spl output field scaling taps - populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->scl_data.taps); + populate_taps_from_spltaps(&pipe_ctx->plane_res.scl_data.taps, &spl_out->dscl_prog_data->taps); // Make scaler data init point to spl output field init - populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->scl_data.inits); + populate_inits_from_splinits(&pipe_ctx->plane_res.scl_data.inits, &spl_out->dscl_prog_data->init); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h index c73d640c3632f..eaa5c5373b284 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.h @@ -6,6 +6,7 @@ #define __DC_SPL_TRANSLATE_H__ #include "dc.h" #include "resource.h" +#include "dm_helpers.h" /* Map SPL input parameters to pipe context * @pipe_ctx: pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dc_state.h b/drivers/gpu/drm/amd/display/dc/dc_state.h index caa45db502329..db1e63a7d460e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_state.h +++ b/drivers/gpu/drm/amd/display/dc/dc_state.h @@ -26,7 +26,6 @@ #ifndef _DC_STATE_H_ #define _DC_STATE_H_ -#include "dc.h" #include "inc/core_status.h" struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index de9bd72ca514d..413970588a26d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -142,6 +142,8 @@ union stream_update_flags { uint32_t mst_bw : 1; uint32_t crtc_timing_adjust : 1; uint32_t fams_changed : 1; + uint32_t scaler_sharpener : 1; + uint32_t sharpening_required : 1; } bits; uint32_t raw; @@ -308,6 +310,8 @@ struct dc_stream_state { bool is_phantom; struct luminance_data lumin_data; + bool scaler_sharpener_update; + bool sharpening_required; }; #define ABM_LEVEL_IMMEDIATE_DISABLE 255 @@ -353,6 +357,8 @@ struct dc_stream_update { struct dc_cursor_attributes *cursor_attributes; struct dc_cursor_position *cursor_position; bool *hw_cursor_req; + bool *scaler_sharpener_update; + bool *sharpening_required; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 97279b080f3e0..c7fa6d0f8f0ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -178,6 +178,10 @@ struct dc_panel_patch { unsigned int skip_avmute; unsigned int mst_start_top_delay; unsigned int remove_sink_ext_caps; + unsigned int disable_colorimetry; + uint8_t blankstream_before_otg_off; + bool oled_optimize_display_on; + unsigned int force_mst_blocked_discovery; }; struct dc_edid_caps { @@ -921,6 +925,12 @@ struct display_endpoint_id { enum display_endpoint_type ep_type; }; +enum backlight_control_type { + BACKLIGHT_CONTROL_PWM = 0, + BACKLIGHT_CONTROL_VESA_AUX = 1, + BACKLIGHT_CONTROL_AMD_AUX = 2, +}; + #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) struct otg_phy_mux { uint8_t phy_output_num; @@ -1050,6 +1060,23 @@ union replay_error_status { unsigned char raw; }; +union replay_low_refresh_rate_enable_options { + struct { + //BIT[0-3]: Replay Low Hz Support control + unsigned int ENABLE_LOW_RR_SUPPORT :1; + unsigned int RESERVED_1_3 :3; + //BIT[4-15]: Replay Low Hz Enable Scenarios + unsigned int ENABLE_STATIC_SCREEN :1; + unsigned int ENABLE_FULL_SCREEN_VIDEO :1; + unsigned int ENABLE_GENERAL_UI :1; + unsigned int RESERVED_7_15 :9; + //BIT[16-31]: Replay Low Hz Enable Check + unsigned int ENABLE_STATIC_FLICKER_CHECK :1; + unsigned int RESERVED_17_31 :15; + } bits; + unsigned int raw; +}; + struct replay_config { /* Replay feature is supported */ bool replay_supported; @@ -1073,6 +1100,8 @@ struct replay_config { bool replay_support_fast_resync_in_ultra_sleep_mode; /* Replay error status */ union replay_error_status replay_error_status; + /* Replay Low Hz enable Options */ + union replay_low_refresh_rate_enable_options low_rr_enable_options; }; /* Replay feature flags*/ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 6ac2bd86c4dbb..160c299419b72 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -328,6 +328,17 @@ type DPSTREAMCLK1_GATE_DISABLE;\ type DPSTREAMCLK2_GATE_DISABLE;\ type DPSTREAMCLK3_GATE_DISABLE;\ + type SYMCLKA_FE_GATE_DISABLE;\ + type SYMCLKB_FE_GATE_DISABLE;\ + type SYMCLKC_FE_GATE_DISABLE;\ + type SYMCLKD_FE_GATE_DISABLE;\ + type SYMCLKE_FE_GATE_DISABLE;\ + type SYMCLKA_GATE_DISABLE;\ + type SYMCLKB_GATE_DISABLE;\ + type SYMCLKC_GATE_DISABLE;\ + type SYMCLKD_GATE_DISABLE;\ + type SYMCLKE_GATE_DISABLE;\ + #define DCCG401_REG_FIELD_LIST(type) \ type OTG0_TMDS_PIXEL_RATE_DIV;\ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 004c4fe3ddfc1..b363f5360818d 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -24,6 +24,7 @@ #include "reg_helper.h" #include "core_types.h" +#include "resource.h" #include "dcn35_dccg.h" #define TO_DCN_DCCG(dccg)\ @@ -136,7 +137,7 @@ static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && enable) return; switch (inst) { @@ -165,7 +166,7 @@ static void dccg35_set_symclk32_se_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable) return; /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */ @@ -204,7 +205,7 @@ static void dccg35_set_symclk32_le_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le && enable) return; switch (inst) { @@ -231,7 +232,7 @@ static void dccg35_set_physymclk_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk && enable) return; switch (inst) { @@ -262,35 +263,45 @@ static void dccg35_set_physymclk_rcg( } static void dccg35_set_symclk_fe_rcg( - struct dccg *dccg, - int inst, - bool enable) + struct dccg *dccg, + int inst, + bool enable) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable) return; switch (inst) { case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKA_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKB_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKC_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKD_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKE_FE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, - SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); @@ -307,27 +318,37 @@ static void dccg35_set_symclk_be_rcg( struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); /* TBD add symclk_be in rcg control bits */ - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk_fe && enable) return; switch (inst) { case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKA_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKB_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKC_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKD_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1); break; case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + SYMCLKE_GATE_DISABLE, enable ? 0 : 1); REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1); break; @@ -342,7 +363,7 @@ static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) return; switch (inst) { @@ -370,7 +391,7 @@ static void dccg35_set_dppclk_rcg(struct dccg *dccg, struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) return; switch (inst) { @@ -399,7 +420,7 @@ static void dccg35_set_dpstreamclk_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream && enable) return; switch (inst) { @@ -436,7 +457,7 @@ static void dccg35_set_smclk32_se_rcg( { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se && enable) return; switch (inst) { @@ -1082,7 +1103,8 @@ static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) uint32_t dispclk_rdivider_value = 0; REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value); - REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value); + if (dispclk_rdivider_value != 0) + REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value); } static void dcn35_set_dppclk_enable(struct dccg *dccg, @@ -1370,10 +1392,10 @@ static void dccg35_set_dtbclk_dto( /* The recommended programming sequence to enable DTBCLK DTO to generate * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should - * be set only after DTO is enabled + * be set only after DTO is enabled. + * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the + * programming is handled in program_pix_clk() regardless, so it can be removed from here. */ - REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], - PIPE_DTO_SRC_SEL[params->otg_inst], 2); } else { switch (params->otg_inst) { case 0: @@ -1390,9 +1412,12 @@ static void dccg35_set_dtbclk_dto( break; } - REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], - DTBCLK_DTO_ENABLE[params->otg_inst], 0, - PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); + /** + * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the + * programming is handled in program_pix_clk() regardless, so it can be removed from here. + */ + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); @@ -1692,6 +1717,12 @@ static void dccg35_disable_symclk32_se( } } +static void dccg35_init_cb(struct dccg *dccg) +{ + (void)dccg; + /* Any RCG should be done when driver enter low power mode*/ +} + void dccg35_init(struct dccg *dccg) { int otg_inst; @@ -1720,10 +1751,6 @@ void dccg35_init(struct dccg *dccg) dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false); } - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) - for (otg_inst = 0; otg_inst < 4; otg_inst++) - dccg35_set_dppclk_root_clock_gating(dccg, otg_inst, 0); - /* dccg35_enable_global_fgcg_rep( dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits @@ -1904,47 +1931,32 @@ static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, } /*get other front end connected to this backend*/ -static uint8_t dccg35_get_other_enabled_symclk_fe(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) +static uint8_t dccg35_get_number_enabled_symclk_fe_connected_to_be(struct dccg *dccg, uint32_t link_enc_inst) { uint8_t num_enabled_symclk_fe = 0; - uint32_t be_clk_en = 0, fe_clk_en[5] = {0}, be_clk_sel[5] = {0}; + uint32_t fe_clk_en[5] = {0}, be_clk_sel[5] = {0}; struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - switch (link_enc_inst) { - case 0: - REG_GET_3(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, &be_clk_en, - SYMCLKA_FE_EN, &fe_clk_en[0], - SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]); - break; - case 1: - REG_GET_3(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, &be_clk_en, - SYMCLKB_FE_EN, &fe_clk_en[1], - SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]); - break; - case 2: - REG_GET_3(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, &be_clk_en, - SYMCLKC_FE_EN, &fe_clk_en[2], - SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]); - break; - case 3: - REG_GET_3(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, &be_clk_en, - SYMCLKD_FE_EN, &fe_clk_en[3], - SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]); - break; - case 4: - REG_GET_3(SYMCLKE_CLOCK_ENABLE, SYMCLKE_CLOCK_ENABLE, &be_clk_en, - SYMCLKE_FE_EN, &fe_clk_en[4], - SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]); - break; - } - if (be_clk_en) { - /* for DPMST, this backend could be used by multiple front end. - only disable the backend if this stream_enc_ins is the last active stream enc connected to this back_end*/ - uint8_t i; - for (i = 0; i != link_enc_inst && i < ARRAY_SIZE(fe_clk_en); i++) { - if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst) - num_enabled_symclk_fe++; - } + REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, &fe_clk_en[0], + SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]); + + REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, &fe_clk_en[1], + SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]); + + REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, &fe_clk_en[2], + SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]); + + REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, &fe_clk_en[3], + SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]); + + REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, &fe_clk_en[4], + SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]); + + uint8_t i; + + for (i = 0; i < ARRAY_SIZE(fe_clk_en); i++) { + if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst) + num_enabled_symclk_fe++; } return num_enabled_symclk_fe; } @@ -1992,9 +2004,9 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst break; } - /*check other enabled symclk fe */ - num_enabled_symclk_fe = dccg35_get_other_enabled_symclk_fe(dccg, stream_enc_inst, link_enc_inst); - /*only turn off backend clk if other front end attachecd to this backend are all off, + /*check other enabled symclk fe connected to this be */ + num_enabled_symclk_fe = dccg35_get_number_enabled_symclk_fe_connected_to_be(dccg, link_enc_inst); + /*only turn off backend clk if other front end attached to this backend are all off, for mst, only turn off the backend if this is the last front end*/ if (num_enabled_symclk_fe == 0) { switch (link_enc_inst) { @@ -2042,8 +2054,6 @@ static void dccg35_set_dpstreamclk_cb( enum dtbclk_source dtb_clk_src; enum dp_stream_clk_source dp_stream_clk_src; - ASSERT(otg_inst >= DP_STREAM_DTBCLK_P5); - switch (src) { case REFCLK: dtb_clk_src = DTBCLK_REFCLK; @@ -2098,6 +2108,13 @@ static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + if (dccg->dpp_clock_gated[dpp_inst]) { + /* + * Do not update the DPPCLK DTO if the clock is stopped. + */ + return; + } + if (dccg->ref_dppclk && req_dppclk) { int ref_dppclk = dccg->ref_dppclk; int modulo, phase; @@ -2125,19 +2142,20 @@ static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, } static void dccg35_dpp_root_clock_control_cb( - struct dccg *dccg, - unsigned int dpp_inst, - bool power_on) + struct dccg *dccg, + unsigned int dpp_inst, + bool power_on) { + if (dccg->dpp_clock_gated[dpp_inst] == power_on) + return; /* power_on set indicates we need to ungate * Currently called from optimize_bandwidth and prepare_bandwidth calls * Since clock source is not passed restore to refclock on ungate * Redundant as gating when enabled is acheived through update_dpp_dto */ - if (power_on) - dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_REFCLK); - else - dccg35_disable_dpp_clk_new(dccg, dpp_inst); + dccg35_set_dppclk_rcg(dccg, dpp_inst, !power_on); + + dccg->dpp_clock_gated[dpp_inst] = !power_on; } static void dccg35_enable_symclk32_se_cb( @@ -2317,11 +2335,19 @@ static void dccg35_disable_symclk_se_cb( /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ } +void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating) +{ + + if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) { + dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); + } +} + static const struct dccg_funcs dccg35_funcs_new = { .update_dpp_dto = dccg35_update_dpp_dto_cb, .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, - .dccg_init = dccg35_init, + .dccg_init = dccg35_init_cb, .set_dpstreamclk = dccg35_set_dpstreamclk_cb, .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb, .enable_symclk32_se = dccg35_enable_symclk32_se_cb, @@ -2377,7 +2403,7 @@ static const struct dccg_funcs dccg35_funcs = { .enable_symclk_se = dccg35_enable_symclk_se, .disable_symclk_se = dccg35_disable_symclk_se, .set_dtbclk_p_src = dccg35_set_dtbclk_p_src, - + .dccg_root_gate_disable_control = dccg35_root_gate_disable_control, }; struct dccg *dccg35_create( @@ -2396,11 +2422,11 @@ struct dccg *dccg35_create( (void)&dccg35_disable_symclk_be_new; (void)&dccg35_set_symclk32_le_root_clock_gating; (void)&dccg35_set_smclk32_se_rcg; - (void)&dccg35_funcs; + (void)&dccg35_funcs_new; base = &dccg_dcn->base; base->ctx = ctx; - base->funcs = &dccg35_funcs_new; + base->funcs = &dccg35_funcs; dccg_dcn->regs = regs; dccg_dcn->dccg_shift = dccg_shift; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h index 1586a45ca3bd4..51f98c5c51c41 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h @@ -241,6 +241,7 @@ struct dccg *dccg35_create( void dccg35_init(struct dccg *dccg); void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value); +void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating); #endif //__DCN35_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 0b889004509ad..d3e46c3cfa575 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -580,9 +580,6 @@ static void dccg401_set_dpstreamclk( int otg_inst, int dp_hpo_inst) { - /* set the dtbclk_p source */ - dccg401_set_dtbclk_p_src(dccg, src, otg_inst); - /* enabled to select one of the DTBCLKs for pipe */ if (src == REFCLK) dccg401_disable_dpstreamclk(dccg, dp_hpo_inst); @@ -805,33 +802,6 @@ static void dccg401_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - switch (link_enc_inst) { - case 0: - REG_UPDATE(SYMCLKA_CLOCK_ENABLE, - SYMCLKA_CLOCK_ENABLE, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKA_ROOT_GATE_DISABLE, 1); - break; - case 1: - REG_UPDATE(SYMCLKB_CLOCK_ENABLE, - SYMCLKB_CLOCK_ENABLE, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKB_ROOT_GATE_DISABLE, 1); - break; - case 2: - REG_UPDATE(SYMCLKC_CLOCK_ENABLE, - SYMCLKC_CLOCK_ENABLE, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKC_ROOT_GATE_DISABLE, 1); - break; - case 3: - REG_UPDATE(SYMCLKD_CLOCK_ENABLE, - SYMCLKD_CLOCK_ENABLE, 1); - if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, SYMCLKD_ROOT_GATE_DISABLE, 1); - break; - } - switch (stream_enc_inst) { case 0: REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, @@ -864,37 +834,8 @@ static void dccg401_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst } } -/*get other front end connected to this backend*/ -static uint8_t dccg401_get_number_enabled_symclk_fe_connected_to_be(struct dccg *dccg, uint32_t link_enc_inst) -{ - uint8_t num_enabled_symclk_fe = 0; - uint32_t fe_clk_en[4] = {0}, be_clk_sel[4] = {0}; - struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - uint8_t i; - - REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, &fe_clk_en[0], - SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]); - - REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, &fe_clk_en[1], - SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]); - - REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, &fe_clk_en[2], - SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]); - - REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, &fe_clk_en[3], - SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]); - - for (i = 0; i < ARRAY_SIZE(fe_clk_en); i++) { - if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst) - num_enabled_symclk_fe++; - } - - return num_enabled_symclk_fe; -} - static void dccg401_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) { - uint8_t num_enabled_symclk_fe = 0; struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); switch (stream_enc_inst) { @@ -919,31 +860,6 @@ static void dccg401_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_ins SYMCLKD_FE_SRC_SEL, 0); break; } - - /*check other enabled symclk fe connected to this be */ - num_enabled_symclk_fe = dccg401_get_number_enabled_symclk_fe_connected_to_be(dccg, link_enc_inst); - /*only turn off backend clk if other front ends attached to this backend are all off, - for mst, only turn off the backend if this is the last front end*/ - if (num_enabled_symclk_fe == 0) { - switch (link_enc_inst) { - case 0: - REG_UPDATE(SYMCLKA_CLOCK_ENABLE, - SYMCLKA_CLOCK_ENABLE, 0); - break; - case 1: - REG_UPDATE(SYMCLKB_CLOCK_ENABLE, - SYMCLKB_CLOCK_ENABLE, 0); - break; - case 2: - REG_UPDATE(SYMCLKC_CLOCK_ENABLE, - SYMCLKC_CLOCK_ENABLE, 0); - break; - case 3: - REG_UPDATE(SYMCLKD_CLOCK_ENABLE, - SYMCLKD_CLOCK_ENABLE, 0); - break; - } - } } static const struct dccg_funcs dccg401_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index b700608e42403..d6e7aaeb909ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -998,6 +998,12 @@ static bool dcn31_program_pix_clk( REG_UPDATE_2(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1, PIPE0_DTO_SRC_SEL, 2); +#if defined(CONFIG_DRM_AMD_DC_HDMI2_1) + else if (dc_is_hdmi_frl_signal(pix_clk_params->signal_type) || encoding == DP_128b_132b_ENCODING) + REG_UPDATE_2(PIXEL_RATE_CNTL[inst], + DP_DTO0_ENABLE, 0, + PIPE0_DTO_SRC_SEL, 2); +#endif else REG_UPDATE_2(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index cae18f8c1c9a0..f7b4867f0b330 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -380,7 +380,6 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; copy_settings_data->panel_inst = panel_inst; copy_settings_data->dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1); - /** * WA for PSRSU+DSC on specific TCON, if DSC is enabled, force PSRSU as ffu mode(full frame update) * Note that PSRSU+DSC is still under development. diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 14f9359616728..c31e4f26a305b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -12,6 +12,8 @@ #define MAX_PIPES 6 +#define GPINT_RETRY_NUM 20 + static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3}; static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5}; @@ -222,6 +224,7 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst, uint32_t *residency, const bool is_start, enum pr_residency_mode mode) { uint16_t param = (uint16_t)(panel_inst << 8); + uint32_t i = 0; switch (mode) { case PR_RESIDENCY_MODE_PHY: @@ -249,10 +252,17 @@ static void dmub_replay_residency(struct dmub_replay *dmub, uint8_t panel_inst, if (is_start) param |= REPLAY_RESIDENCY_ENABLE; - // Send gpint command and wait for ack - if (!dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__REPLAY_RESIDENCY, param, - residency, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - *residency = 0; + for (i = 0; i < GPINT_RETRY_NUM; i++) { + // Send gpint command and wait for ack + if (dc_wake_and_execute_gpint(dmub->ctx, DMUB_GPINT__REPLAY_RESIDENCY, param, + residency, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + return; + + udelay(100); + } + + // it means gpint retry many times + *residency = 0; } /* diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index eaed5d1c398aa..dcd2cdfe91eb6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -365,23 +365,18 @@ bool cm_helper_translate_curve_to_hw_format(struct dc_context *ctx, region_start = -MAX_LOW_POINT; region_end = NUMBER_REGIONS - MAX_LOW_POINT; } else { - /* 11 segments - * segment is from 2^-10 to 2^1 + /* 13 segments + * segment is from 2^-12 to 2^0 * There are less than 256 points, for optimization */ - seg_distr[0] = 3; - seg_distr[1] = 4; - seg_distr[2] = 4; - seg_distr[3] = 4; - seg_distr[4] = 4; - seg_distr[5] = 4; - seg_distr[6] = 4; - seg_distr[7] = 4; - seg_distr[8] = 4; - seg_distr[9] = 4; - seg_distr[10] = 1; - - region_start = -10; + const uint8_t SEG_COUNT = 12; + + for (i = 0; i < SEG_COUNT; i++) + seg_distr[i] = 4; + + seg_distr[SEG_COUNT] = 1; + + region_start = -SEG_COUNT; region_end = 1; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index f31f0e3abfc0f..1e1038fb04e8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -140,23 +140,18 @@ bool cm3_helper_translate_curve_to_hw_format( region_start = -MAX_LOW_POINT; region_end = NUMBER_REGIONS - MAX_LOW_POINT; } else { - /* 11 segments - * segment is from 2^-10 to 2^0 + /* 13 segments + * segment is from 2^-12 to 2^0 * There are less than 256 points, for optimization */ - seg_distr[0] = 3; - seg_distr[1] = 4; - seg_distr[2] = 4; - seg_distr[3] = 4; - seg_distr[4] = 4; - seg_distr[5] = 4; - seg_distr[6] = 4; - seg_distr[7] = 4; - seg_distr[8] = 4; - seg_distr[9] = 4; - seg_distr[10] = 1; - - region_start = -10; + const uint8_t SEG_COUNT = 12; + + for (i = 0; i < SEG_COUNT; i++) + seg_distr[i] = 4; + + seg_distr[SEG_COUNT] = 1; + + region_start = -SEG_COUNT; region_end = 1; } diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c index 51a57dae18114..182437fd0e147 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_link_encoder.c @@ -194,7 +194,7 @@ bool enc2_fec_is_active(struct link_encoder *enc) return (active != 0); } - + /* this function reads dsc related register fields to be logged later in dcn10_log_hw_state * into a dcn_dsc_state struct. */ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c index 0b47aeb60e795..1953c56367d32 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c @@ -207,7 +207,6 @@ static void enc2_stream_encoder_stop_hdmi_info_packets( HDMI_GENERIC7_LINE, 0); } - /* Update GSP7 SDP 128 byte long */ static void enc2_update_gsp7_128_info_packet( struct dcn10_stream_encoder *enc1, diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c index 5b343f745cf33..1153caa60d5b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c @@ -83,6 +83,15 @@ void enc314_disable_fifo(struct stream_encoder *enc) REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0); } +static bool enc314_is_fifo_enabled(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t reset_val; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val); + return (reset_val != 0); +} + void enc314_dp_set_odm_combine( struct stream_encoder *enc, bool odm_combine) @@ -457,9 +466,7 @@ static const struct stream_encoder_funcs dcn314_str_enc_funcs = { .set_avmute = enc1_stream_encoder_set_avmute, .dig_connect_to_otg = enc1_dig_connect_to_otg, .dig_source_otg = enc1_dig_source_otg, - .dp_get_pixel_format = enc1_stream_encoder_dp_get_pixel_format, - .enc_read_state = enc314_read_state, .dp_set_dsc_config = enc314_dp_set_dsc_config, .dp_set_dsc_pps_info_packet = enc3_dp_set_dsc_pps_info_packet, @@ -468,6 +475,7 @@ static const struct stream_encoder_funcs dcn314_str_enc_funcs = { .enable_fifo = enc314_enable_fifo, .disable_fifo = enc314_disable_fifo, + .is_fifo_enabled = enc314_is_fifo_enabled, .set_input_mode = enc314_set_dig_input_mode, }; diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 2e4a46f1b499d..69d846ccbb2a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -158,6 +158,7 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable ); + bool dm_helpers_is_dp_sink_present( struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 46f9c05de16e8..6ff29ecf047ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -24,9 +24,47 @@ # Makefile for the 'utils' sub-component of DAL. # It provides the general basic services required by other DAL # subcomponents. - +# +ifdef CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT dml_ccflags := $(CC_FLAGS_FPU) dml_rcflags := $(CC_FLAGS_NO_FPU) +else +ifdef CONFIG_X86 +dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float +dml_ccflags := $(dml_ccflags-y) -msse +endif + +ifdef CONFIG_PPC64 +dml_ccflags := -mhard-float -maltivec +endif + +ifdef CONFIG_ARM64 +dml_rcflags := -mgeneral-regs-only +endif + +ifdef CONFIG_LOONGARCH +dml_ccflags := -mfpu=64 +dml_rcflags := -msoft-float +endif + +include $(src)/../dkms/Makefile.compiler + +ifneq ($(call gcc-min-version, 70100),y) +IS_OLD_GCC = 1 +endif + +ifdef CONFIG_X86 +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dml_ccflags += -mpreferred-stack-boundary=4 +else +dml_ccflags += -msse2 +endif +endif + +endif #CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT ifneq ($(CONFIG_FRAME_WARN),0) ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c index 565f3c4924770..0c8c4a080c50e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c @@ -785,12 +785,9 @@ static bool CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBW = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 9d6675ecc5f11..c935903b68e10 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -845,12 +845,9 @@ static bool CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBW = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index e7019c95ba79e..390c1a77fda6a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib, if (swath_height_c > 0) log2_swath_height_c = dml_log2(swath_height_c); - - if (req128_c && log2_swath_height_c > 0) - log2_swath_height_c -= 1; } rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; @@ -446,8 +443,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); log2_blk_bytes = dml_log2((double) blk_bytes); - log2_blk_height = 0; - log2_blk_width = 0; // remember log rule // "+" in log is multiply @@ -494,8 +489,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, - log2_meta_req_height; meta_req_width = 1 << log2_meta_req_width; meta_req_height = 1 << log2_meta_req_height; - log2_meta_row_height = 0; - meta_row_width_ub = 0; // the dimensions of a meta row are meta_row_width x meta_row_height in elements. // calculate upper bound of the meta_row_width diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index ae52510417280..843d6004258ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib, if (swath_height_c > 0) log2_swath_height_c = dml_log2(swath_height_c); - - if (req128_c && log2_swath_height_c > 0) - log2_swath_height_c -= 1; } rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l; @@ -446,8 +443,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); log2_blk_bytes = dml_log2((double) blk_bytes); - log2_blk_height = 0; - log2_blk_width = 0; // remember log rule // "+" in log is multiply @@ -494,8 +489,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, - log2_meta_req_height; meta_req_width = 1 << log2_meta_req_width; meta_req_height = 1 << log2_meta_req_height; - log2_meta_row_height = 0; - meta_row_width_ub = 0; // the dimensions of a meta row are meta_row_width x meta_row_height in elements. // calculate upper bound of the meta_row_width diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index eb3ed965e48b7..cd8cca6514196 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -1049,12 +1049,9 @@ static bool CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 9e1c18b90805d..5718000627b08 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -435,8 +435,6 @@ static void get_meta_and_pte_attr( blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); log2_blk_bytes = dml_log2((double) blk_bytes); - log2_blk_height = 0; - log2_blk_width = 0; // remember log rule // "+" in log is multiply @@ -485,8 +483,6 @@ static void get_meta_and_pte_attr( - log2_meta_req_height; meta_req_width = 1 << log2_meta_req_width; meta_req_height = 1 << log2_meta_req_height; - log2_meta_row_height = 0; - meta_row_width_ub = 0; // the dimensions of a meta row are meta_row_width x meta_row_height in elements. // calculate upper bound of the meta_row_width diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 1c10ba4dcddea..cee1b351e1058 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -1280,12 +1280,9 @@ static bool CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; @@ -1775,15 +1772,6 @@ static unsigned int CalculateVMAndRowBytes( *PixelPTEReqWidth = 32768.0 / BytePerPixel; *PTERequestSize = 64; FractionOfPTEReturnDrop = 0; - } else if (MacroTileSizeBytes == 4096) { - PixelPTEReqHeightPTEs = 1; - *PixelPTEReqHeight = MacroTileHeight; - *PixelPTEReqWidth = 8 * *MacroTileWidth; - *PTERequestSize = 64; - if (ScanDirection != dm_vert) - FractionOfPTEReturnDrop = 0; - else - FractionOfPTEReturnDrop = 7.0 / 8; } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { PixelPTEReqHeightPTEs = 16; *PixelPTEReqHeight = 16 * BlockHeight256Bytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index b28fcc8608ff8..76d3bb3c91550 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -392,8 +392,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); log2_blk_bytes = dml_log2((double)blk_bytes); - log2_blk_height = 0; - log2_blk_width = 0; // remember log rule // "+" in log is multiply @@ -464,8 +462,6 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, - log2_meta_req_height; meta_req_width = 1 << log2_meta_req_width; meta_req_height = 1 << log2_meta_req_height; - log2_meta_row_height = 0; - meta_row_width_ub = 0; // the dimensions of a meta row are meta_row_width x meta_row_height in elements. // calculate upper bound of the meta_row_width diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 0b132ce1d2cdc..f567a9023682d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -1444,12 +1444,9 @@ static bool CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; @@ -1924,15 +1921,6 @@ static unsigned int CalculateVMAndRowBytes( *PixelPTEReqWidth = 32768.0 / BytePerPixel; *PTERequestSize = 64; FractionOfPTEReturnDrop = 0; - } else if (MacroTileSizeBytes == 4096) { - PixelPTEReqHeightPTEs = 1; - *PixelPTEReqHeight = MacroTileHeight; - *PixelPTEReqWidth = 8 * *MacroTileWidth; - *PTERequestSize = 64; - if (ScanDirection != dm_vert) - FractionOfPTEReturnDrop = 0; - else - FractionOfPTEReturnDrop = 7.0 / 8; } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { PixelPTEReqHeightPTEs = 16; *PixelPTEReqHeight = 16 * BlockHeight256Bytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index b57b095cd4a81..c46bda2141acd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -413,8 +413,6 @@ static void get_meta_and_pte_attr( log2_blk256_height = dml_log2((double) blk256_height); blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); log2_blk_bytes = dml_log2((double) blk_bytes); - log2_blk_height = 0; - log2_blk_width = 0; // remember log rule // "+" in log is multiply @@ -481,8 +479,6 @@ static void get_meta_and_pte_attr( log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; meta_req_width = 1 << log2_meta_req_width; meta_req_height = 1 << log2_meta_req_height; - log2_meta_row_height = 0; - meta_row_width_ub = 0; // the dimensions of a meta row are meta_row_width x meta_row_height in elements. // calculate upper bound of the meta_row_width diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index debfa31583a69..5865e8fa2d8e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -1461,12 +1461,9 @@ static bool CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c index 61b3bebf24c96..b7d2a0caec11b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c @@ -501,8 +501,6 @@ static void get_meta_and_pte_attr( log2_blk256_height = dml_log2((double) blk256_height); blk_bytes = surf_linear ? 256 : get_blk_size_bytes((enum source_macro_tile_size) macro_tile_size); log2_blk_bytes = dml_log2((double) blk_bytes); - log2_blk_height = 0; - log2_blk_width = 0; // remember log rule // "+" in log is multiply @@ -569,8 +567,6 @@ static void get_meta_and_pte_attr( log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; meta_req_width = 1 << log2_meta_req_width; meta_req_height = 1 << log2_meta_req_height; - log2_meta_row_height = 0; - meta_row_width_ub = 0; // the dimensions of a meta row are meta_row_width x meta_row_height in elements. // calculate upper bound of the meta_row_width diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index d92fb428ee96f..86ac7d59fd325 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -4097,12 +4097,9 @@ bool dml32_CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index a201dbb743d79..d9e63c4fdd95c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -204,8 +204,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = { .num_states = 8, .sr_exit_time_us = 28.0, .sr_enter_plus_exit_time_us = 30.0, - .sr_exit_z8_time_us = 250.0, - .sr_enter_plus_exit_z8_time_us = 350.0, + .sr_exit_z8_time_us = 263.0, + .sr_enter_plus_exit_z8_time_us = 363.0, .fclk_change_latency_us = 24.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index d8bfc85e5dcd0..88dc2b97e7bf5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -559,12 +559,11 @@ static void get_surf_rq_param( const struct _vcs_dpi_display_pipe_source_params_st *pipe_src_param, bool is_chroma) { - bool mode_422 = 0; unsigned int vp_width = 0; unsigned int vp_height = 0; unsigned int data_pitch = 0; unsigned int meta_pitch = 0; - unsigned int ppe = mode_422 ? 2 : 1; + unsigned int ppe = 1; bool surf_linear; bool surf_vert; unsigned int bytes_per_element; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c index ef75eb7d5adc3..bf01d8a9e538b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/rc_calc_fpu.c @@ -257,4 +257,3 @@ void _do_calc_rc_params(struct rc_params *rc, rc->rc_buf_thresh[12] = 8000; rc->rc_buf_thresh[13] = 8064; } - diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index cf979ab172bdc..fe66c2ee676aa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -24,8 +24,46 @@ # # Makefile for dml2. +ifdef CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT dml2_ccflags := $(CC_FLAGS_FPU) dml2_rcflags := $(CC_FLAGS_NO_FPU) +else +ifdef CONFIG_X86 +dml2_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float +dml2_ccflags := $(dml2_ccflags-y) -msse +endif + +ifdef CONFIG_PPC64 +dml2_ccflags := -mhard-float -maltivec +endif + +ifdef CONFIG_ARM64 +dml2_rcflags := -mgeneral-regs-only +endif + +ifdef CONFIG_LOONGARCH +dml2_ccflags := -mfpu=64 +dml2_rcflags := -msoft-float +endif + +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif + +ifdef CONFIG_X86 +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dml2_ccflags += -mpreferred-stack-boundary=4 +else +dml2_ccflags += -msse2 +endif +endif + +endif #CONFIG_ARCH_HAS_KERNEL_FPU_SUPPORT ifneq ($(CONFIG_FRAME_WARN),0) ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) @@ -79,7 +117,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) @@ -101,7 +138,6 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o : CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) @@ -122,7 +158,6 @@ DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o -DML21 += src/dml2_core/dml2_core_shared.o DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o DML21 += src/dml2_dpmm/dml2_dpmm_factory.o DML21 += src/dml2_mcg/dml2_mcg_dcn4.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 547dfcc80fde4..be87dc0f07799 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -1222,6 +1222,7 @@ static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st * s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal); + s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); @@ -6433,7 +6434,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) /* Output */ &mode_lib->ms.UrgentBurstFactorCursorPre[k], &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], &mode_lib->ms.NotUrgentLatencyHidingPre[k]); mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * @@ -8926,7 +8927,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature - // if possible, then will try to program for the best power saving features in order of diffculty (dram, fclk, stutter) + // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter) s->iteration = 0; s->MaxTotalRDBandwidth = 0; s->AllPrefetchModeTested = false; @@ -9189,6 +9190,8 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc &locals->FractionOfUrgentBandwidth, &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport + + if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) { dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); @@ -9203,6 +9206,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc } } + if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) { locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( mode_lib->ms.num_active_planes, @@ -9977,7 +9981,7 @@ void dml_core_get_row_heights( dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes); #endif - // just suppluy with enough parameters to calculate meta and dte + // just supply with enough parameters to calculate meta and dte CalculateVMAndRowBytes( 0, // dml_bool_t ViewportStationary, 1, // dml_bool_t DCCEnable, @@ -10110,7 +10114,7 @@ dml_bool_t dml_mode_support( /// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK /// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values. /// @param state_idx Power state idx chosen -/// @param display_cfg Display Congiuration +/// @param display_cfg Display Configuration /// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming /// TODO: Add clk_cfg input, could be useful for standalone mode dml_bool_t dml_mode_programming( diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 710a25dcfef0f..f66493528f420 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -339,11 +339,22 @@ void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_in // } } +static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) +{ + unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total; + + if (stream->ctx->dc->caps.vtotal_limited_by_fp2) { + max_hw_v_total -= stream->timing.v_front_porch + 1; + } + + return max_hw_v_total; +} + static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, struct dc_stream_state *stream, struct dml2_context *dml_ctx) { - unsigned int hblank_start, vblank_start; + unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz; timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; @@ -371,11 +382,23 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf - stream->timing.v_border_top - stream->timing.v_border_bottom; timing->drr_config.enabled = stream->ignore_msa_timing_param; - timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz; timing->drr_config.drr_active_variable = stream->vrr_active_variable; timing->drr_config.drr_active_fixed = stream->vrr_active_fixed; timing->drr_config.disallowed = !stream->allow_freesync; + /* limit min refresh rate to DC cap */ + min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz; + if (stream->ctx->dc->caps.max_v_total != 0) { + min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL), + (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream))); + } + + if (stream->timing.min_refresh_in_uhz > min_hardware_refresh_in_uhz) { + timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz; + } else { + timing->drr_config.min_refresh_uhz = min_hardware_refresh_in_uhz; + } + if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase && stream->ctx->dc->config.enable_fpo_flicker_detection == 1) timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false); @@ -777,6 +800,14 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm * certain cases. Hence do corrective active and disable scaling. */ plane->composition.scaler_info.enabled = false; + } else if ((plane_state->ctx->dc->config.use_spl == true) && + (plane->composition.scaler_info.enabled == false)) { + /* To enable sharpener for 1:1, scaler must be enabled. If use_spl is set, then + * allow case where ratio is 1 but taps > 1 + */ + if ((scaler_data->taps.h_taps > 1) || (scaler_data->taps.v_taps > 1) || + (scaler_data->taps.h_taps_c > 1) || (scaler_data->taps.v_taps_c > 1)) + plane->composition.scaler_info.enabled = true; } /* always_scale is only used for debug purposes not used in production but has to be @@ -850,7 +881,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->immediate_flip = plane_state->flip_immediate; - plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable; + plane->composition.rect_out_height_spans_vactive = + plane_state->dst_rect.height >= stream->src.height && + stream->dst.height >= stream->timing.v_addressable; } //TODO : Could be possibly moved to a common helper layer. @@ -1026,6 +1059,7 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; + context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; } void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index d35dd507cb9f8..bbc28b9a15a36 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -13,11 +13,11 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) { - *dml_ctx = (struct dml2_context *)kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + *dml_ctx = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); if (!(*dml_ctx)) return false; - (*dml_ctx)->v21.dml_init.dml2_instance = (struct dml2_instance *)kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); + (*dml_ctx)->v21.dml_init.dml2_instance = kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); if (!((*dml_ctx)->v21.dml_init.dml2_instance)) return false; @@ -27,7 +27,7 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; - (*dml_ctx)->v21.mode_programming.programming = (struct dml2_display_cfg_programming *)kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); + (*dml_ctx)->v21.mode_programming.programming = kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); if (!((*dml_ctx)->v21.mode_programming.programming)) return false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 898b1dd69edd8..8ef7977841de0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -355,7 +355,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, .scheduling_delay_us = 125, - .vertical_interrupt_ack_delay_us = 18, + .vertical_interrupt_ack_delay_us = 40, .allow_programming_delay_us = 18, .min_allow_width_us = 20, .subvp_df_throttle_delay_us = 100, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 83fc15bf13cf7..25b607e7b726e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -88,6 +88,7 @@ struct dml2_display_arb_regs { uint32_t sdpif_request_rate_limit; uint32_t allow_sdpif_rate_limit_when_cstate_req; uint32_t dcfclk_deep_sleep_hysteresis; + uint32_t pstate_stall_threshold; }; struct dml2_cursor_dlg_regs{ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 4a46b21c3e554..ebd8abe894a9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -151,6 +151,7 @@ struct dml2_soc_bb { double phy_downspread_percent; double dcn_downspread_percent; double dispclk_dppclk_vco_speed_mhz; + bool no_dfs; bool do_urgent_latency_adjustment; unsigned int mem_word_bytes; unsigned int num_dcc_mcaches; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 1c773bbb99929..eeb96c4556584 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -5,7 +5,6 @@ #ifndef __DML_TOP_TYPES_H__ #define __DML_TOP_TYPES_H__ -#include "dml_top_types.h" #include "dml_top_display_cfg_types.h" #include "dml_top_soc_parameter_types.h" #include "dml_top_policy_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 9375c6ae11475..3d41ffde91c1b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -9,7 +9,7 @@ #include "dml2_debug.h" #include "lib_float_math.h" -struct dml2_core_ip_params core_dcn4_ip_caps_base = { +static const struct dml2_core_ip_params core_dcn4_ip_caps_base = { // Hardcoded values for DCN3x .vblank_nom_default_us = 668, .remote_iommu_outstanding_translations = 256, @@ -159,6 +159,7 @@ static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters phantom->timing.v_total = meta->v_total; phantom->timing.v_active = meta->v_active; phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; phantom->timing.drr_config.enabled = false; } @@ -273,7 +274,6 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in programming->fams2_required = display_cfg->stage3.fams2_required; dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); - programming->fams2_global_config.features.bits.enable = display_cfg->stage3.fams2_required; } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c3c4d8d9525ce..601320b1be817 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -8,32 +8,56 @@ #include "dml2_debug.h" #include "lib_float_math.h" #include "dml_top_types.h" -#include "dml2_core_shared.h" -//#define DML_TVM_UPDATE_EN #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 +#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE -static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + +static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; +} + +static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { dml2_printf("DML: ===================================== \n"); dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); - if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) - dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = %d\n", support->DSCOnlyIfNecessaryWithBPP); - if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); - if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) @@ -42,74 +66,87 @@ static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mod dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); - if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); - if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); - if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); - if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); - if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); if (!fail_only || support->PrefetchSupported == 0) dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); if (!fail_only || support->DynamicMetadataSupported == 0) dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); - if (!fail_only || support->TotalAvailablePipesSupport == 0) - dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->ModeSupport == 0) dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); dml2_printf("DML: ===================================== \n"); } @@ -2049,7 +2086,11 @@ static void CalculateDCCConfiguration( unsigned int full_swath_bytes_vert_wc_l; unsigned int full_swath_bytes_vert_wc_c; - yuv420 = dml_is_420(SourcePixelFormat); + if (dml_is_420(SourcePixelFormat)) + yuv420 = 1; + else + yuv420 = 0; + horz_div_l = 1; horz_div_c = 1; vert_div_l = 1; @@ -2517,8 +2558,11 @@ static void calculate_mcache_setting( l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2; // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c: - l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; - if (l->is_dual_plane) { + if (*p->num_mcaches_l) { + l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; + } + + if (l->is_dual_plane && *p->num_mcaches_c) { l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { @@ -2647,9 +2691,9 @@ static double dml_get_return_bandwidth_available( double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; - double derate_sdp_factor = 1; - double derate_fabric_factor = 1; - double derate_dram_factor = 1; + double derate_sdp_factor; + double derate_fabric_factor; + double derate_dram_factor; double derate_sdp_bandwidth; double derate_fabric_bandwidth; @@ -2849,16 +2893,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->hostvm_enable == true) { + if (p->display_cfg->gpuvm_enable == true) { p->vm_group_bytes[k] = 512; p->dpte_group_bytes[k] = 512; - } else if (p->display_cfg->gpuvm_enable == true) { - p->vm_group_bytes[k] = 2048; - if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { - p->dpte_group_bytes[k] = 512; - } else { - p->dpte_group_bytes[k] = 2048; - } } else { p->vm_group_bytes[k] = 0; p->dpte_group_bytes[k] = 0; @@ -3850,6 +3887,10 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch #endif *p->hw_debug5 = false; +#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE + if (p->NumberOfActiveSurfaces > 1) + *p->hw_debug5 = true; +#else for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) && p->display_cfg->plane_descriptors[k].surface.dcc.enable @@ -3865,6 +3906,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif } +#endif } static enum dml2_odm_mode DecideODMMode(unsigned int HActive, @@ -4556,15 +4598,6 @@ static void calculate_tdlut_setting( return; } - - if (!p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = 0; - *p->tdlut_opt_time = 0; - *p->tdlut_drain_time = 0; - *p->tdlut_bytes_per_group = 0; - return; - } - if (p->tdlut_mpc_width_flag) { tdlut_mpc_width = 33; tdlut_bytes_per_group_simple = 39*256; @@ -4624,7 +4657,7 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; } @@ -4637,7 +4670,7 @@ static void calculate_tdlut_setting( dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode); + dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); @@ -4703,11 +4736,12 @@ static void CalculateTarb( static double CalculateTWait( long reserved_vblank_time_ns, double UrgentLatency, - double Ttrip) + double Ttrip, + double g6_temp_read_blackout_us) { double TWait; double t_urg_trip = math_max2(UrgentLatency, Ttrip); - TWait = reserved_vblank_time_ns/1000.0 + t_urg_trip; + TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); @@ -4855,13 +4889,23 @@ static double get_urgent_bandwidth_required( } if (!exclude_this_plane) { - surface_required_bw[k] = math_max4(NumberOfDPP[k] * prefetch_vmrow_bw[k], - l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur, - l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre, - (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]); + l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; + l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; + l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; + surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); +#endif } else { surface_required_bw[k] = 0.0; } @@ -4870,6 +4914,8 @@ static double get_urgent_bandwidth_required( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); @@ -4883,6 +4929,8 @@ static double get_urgent_bandwidth_required( dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); @@ -5021,6 +5069,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->trip_to_mem = 0.0; *p->Tvm_trips = 0.0; *p->Tr0_trips = 0.0; + s->Tvm_no_trip_oto = 0.0; + s->Tr0_no_trip_oto = 0.0; s->Tvm_trips_rounded = 0.0; s->Tr0_trips_rounded = 0.0; s->max_Tsw = 0.0; @@ -5037,7 +5087,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->bytes_pp = 0.0; s->dep_bytes = 0.0; s->min_Lsw_oto = 0.0; + s->min_Lsw_equ = 0.0; s->Tsw_est1 = 0.0; + s->Tsw_est2 = 0.0; s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; @@ -5059,7 +5111,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); @@ -5092,21 +5143,15 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; s->trip_to_mem = p->Ttrip; -#ifdef DML_TVM_UPDATE_EN *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); if (dcc_mrq_enable) *p->Tvm_trips_flip = *p->Tvm_trips; else *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#else - *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)); - *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#endif *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled == true) { *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; @@ -5114,15 +5159,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tdmdl_vm = 0; *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex } -#else - if (p->DynamicMetadataVMEnabled == true) { - *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; - *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; - } else { - *p->Tdmdl_vm = 0; - *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex - } -#endif if (p->DynamicMetadataEnable == true) { if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { @@ -5186,7 +5222,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); #endif - s->NoTimeToPrefetch = false; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); @@ -5199,14 +5234,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; } else { -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#endif + s->Tvm_trips_rounded = s->LineTime / 4.0; *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; } @@ -5235,16 +5266,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tno_bw = 0; } -#ifdef DML_TVM_UPDATE_EN if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) *p->Tno_bw_flip = *p->Tno_bw; else *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip -#else - *p->Tno_bw_flip = 0; - if (p->display_cfg->gpuvm_enable == true) - *p->Tno_bw_flip = *p->Tno_bw; -#endif if (dml_is_420(p->myPipe->SourcePixelFormat)) { s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; @@ -5266,6 +5291,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); @@ -5279,35 +5308,38 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; if (p->display_cfg->gpuvm_enable == true) { - s->Tvm_oto = math_max3( - *p->Tvm_trips, + s->Tvm_no_trip_oto = math_max2( *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, s->LineTime / 4.0); + s->Tvm_oto = math_max2( + *p->Tvm_trips, + s->Tvm_no_trip_oto); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { -#ifdef DML_TVM_UPDATE_EN + s->Tvm_no_trip_oto = s->Tvm_trips_rounded; s->Tvm_oto = s->Tvm_trips_rounded; -#else - s->Tvm_oto = s->LineTime / 4.0; -#endif } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { - s->Tr0_oto = math_max3( - *p->Tr0_trips, + s->Tr0_no_trip_oto = math_max2( (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, s->LineTime / 4.0); + s->Tr0_oto = math_max2( + *p->Tr0_trips, + s->Tr0_no_trip_oto); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); #endif - } else - s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 4.0; + } else { + s->Tr0_no_trip_oto = (s->LineTime - s->Tvm_oto) / 4.0; + s->Tr0_oto = s->Tr0_no_trip_oto; + } s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; @@ -5317,19 +5349,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); //Tpre_equ in line time -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; else s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; -#else - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo; -#endif s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); @@ -5367,6 +5396,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; +#ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); @@ -5387,18 +5417,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - - s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - - dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes); - dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes); dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - - if (s->prefetch_sw_bytes < s->dep_bytes) { - s->prefetch_sw_bytes = 2 * s->dep_bytes; - dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes); - } + dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); +#endif *p->dst_y_per_vm_vblank = 0; *p->dst_y_per_row_vblank = 0; @@ -5411,7 +5435,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less - if (s->dst_y_prefetch_equ > 1) { + bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) { s->prefetch_bw1 = 0.; s->prefetch_bw2 = 0.; s->prefetch_bw3 = 0.; @@ -5428,28 +5454,35 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw1 = 0; dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((p->VStartup == p->MaxVStartup) && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); - dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_oto * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } // prefetch_bw2: VM + SW - if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); - else + (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2; + } else s->prefetch_bw2 = 0; + dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + } + // prefetch_bw3: 2*R0 + SW if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / @@ -5459,8 +5492,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw3 = 0; dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && ((s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } @@ -5476,6 +5509,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); @@ -5496,9 +5530,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // here is to make sure equ bw wont be more agressive than the latency-based requirement. // check vm time >= vm_trips // check r0 time >= r0_trips + + double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + if (s->prefetch_bw1 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; + double row_transfer_time = total_row_bytes / s->prefetch_bw1; + dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case1OK = true; } } @@ -5508,8 +5551,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time >= vm_trips // check r0 time < r0_trips if (s->prefetch_bw2 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; + double row_transfer_time = total_row_bytes / s->prefetch_bw2; + dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { Case2OK = true; } } @@ -5518,8 +5564,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time < vm_trips // check r0 time >= r0_trips if (s->prefetch_bw3 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; + double row_transfer_time = total_row_bytes / s->prefetch_bw3; + dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case3OK = true; } } @@ -5568,6 +5617,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); #endif + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->Lsw_equ = s->dst_y_prefetch_equ - math_ceil2(4.0 * (s->Tvm_equ + 2 * s->Tr0_equ) / s->LineTime, 1.0) / 4.0; + // Use the more stressful prefetch schedule if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { *p->dst_y_prefetch = s->dst_y_prefetch_oto; @@ -5576,29 +5628,28 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + s->dst_y_per_vm_no_trip_vblank = math_ceil2(4.0 * s->Tvm_no_trip_oto / s->LineTime, 1.0) / 4.0; + s->dst_y_per_row_no_trip_vblank = math_ceil2(4.0 * s->Tr0_no_trip_oto / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__); #endif - } else { *p->dst_y_prefetch = s->dst_y_prefetch_equ; s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - if (p->VStartup == p->MaxVStartup) { - *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } else { - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + s->dst_y_per_vm_no_trip_vblank = *p->dst_y_per_vm_vblank; + s->dst_y_per_row_no_trip_vblank = *p->dst_y_per_row_vblank; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif } - // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) - s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw + /* take worst case Lsw to calculate bandwidth requirement regardless of schedule */ + s->LinesToRequestPrefetchPixelData = math_min2(s->Lsw_equ, s->Lsw_oto); // Lsw s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); @@ -5635,7 +5686,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); *p->VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5658,7 +5709,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); *p->VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5680,14 +5731,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch #endif } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); *p->VRatioPrefetchY = 0; *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; } - dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); @@ -5698,7 +5748,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); } else { - dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5717,26 +5769,26 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (vm_bytes == 0) { prefetch_vm_bw = 0; - } else if (*p->dst_y_per_vm_vblank > 0) { + } else if (s->dst_y_per_vm_no_trip_vblank > 0) { #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); #endif - prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (s->dst_y_per_vm_no_trip_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif } else { prefetch_vm_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); } if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { prefetch_row_bw = 0; - } else if (*p->dst_y_per_row_vblank > 0) { - prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); + } else if (s->dst_y_per_row_no_trip_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (s->dst_y_per_row_no_trip_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); @@ -5746,7 +5798,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_row_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); } *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); @@ -5763,11 +5815,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; + *p->prefetch_vmrow_bw = 0; } dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + return s->NoTimeToPrefetch; } @@ -6174,7 +6231,7 @@ static void CalculateFlipSchedule( { struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; - l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; l->dpte_row_bytes = DPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ @@ -6250,7 +6307,7 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes); + dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); @@ -6261,6 +6318,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); } #endif l->lb_flip_bw = math_max3(l->lb_flip_bw, @@ -6277,7 +6335,7 @@ static void CalculateFlipSchedule( *dst_y_per_vm_flip = 1; // not used *dst_y_per_row_flip = 1; // not used - *ImmediateFlipSupportedForPipe = true; + *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); } else { if (iflip_enable) { l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) @@ -6343,6 +6401,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); } dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); @@ -6373,6 +6432,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { + p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + } p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ @@ -6579,13 +6644,13 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; - if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); else p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]); + dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); @@ -6850,7 +6915,8 @@ struct dml2_core_internal_g6_temp_read_blackouts_table { } entries[DML_MAX_CLK_TABLE_SIZE]; }; -struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { +static const struct dml2_core_internal_g6_temp_read_blackouts_table + core_dcn4_g6_temp_read_blackout_table = { .entries = { { .uclk_khz = 96000, @@ -6915,6 +6981,21 @@ static double get_g6_temp_read_blackout_us( return (double)blackout_us; } +static double get_max_urgent_latency_us( + struct dml2_dcn4x_soc_qos_params *dcn4x, + double uclk_freq_mhz, + double FabricClock, + unsigned int min_clk_index) +{ + double latency; + latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz + * (1 + dcn4x->umc_max_latency_margin / 100.0) + + dcn4x->mall_overhead_fclk_cycles / FabricClock + + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock + * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); + return latency; +} + static void calculate_pstate_keepout_dst_lines( const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_watermarks *watermarks, @@ -6997,7 +7078,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); @@ -7142,7 +7222,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -7223,12 +7303,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } #endif */ - mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); if (mode_lib->ms.BytePerPixelC[k] == 0.0) { mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; } else { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); } @@ -7310,7 +7390,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.ViewportExceedsSurface = false; if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || + display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { mode_lib->ms.support.ViewportExceedsSurface = true; #if defined(__DML_VBA_DEBUG__) dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); @@ -7319,11 +7400,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); #endif - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || - display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { - mode_lib->ms.support.ViewportExceedsSurface = true; - } + } + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; } } } @@ -7599,7 +7680,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ip.writeback_line_buffer_buffer_size)); } @@ -7684,8 +7765,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) mode_lib->ms.support.P2IWith420 = true; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0) - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) mode_lib->ms.support.DSC422NativeNotSupported = true; @@ -8483,7 +8562,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out { mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - calculate_hostvm_inefficiency_factor( &s->HostVMInefficiencyFactor, &s->HostVMInefficiencyFactorPrefetch, @@ -8568,7 +8646,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory); + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; @@ -8615,7 +8695,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -8697,8 +8776,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); } } @@ -8711,20 +8790,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.VRatioInPrefetchSupported = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; + dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); } } - s->AnyLinesForVMOrRowTooLarge = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) { - s->AnyLinesForVMOrRowTooLarge = true; - } - } - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok if (mode_lib->ms.support.PrefetchSupported) { for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { @@ -8961,6 +9035,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->mSOCParameters.USRRetrainingLatency = 0; s->mSOCParameters.SMNLatency = 0; s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -8980,7 +9057,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param? CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; @@ -9011,22 +9087,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); } - + dml2_printf("DML::%s: Done prefetch calculation\n", __func__); // End of Prefetch Check - dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; //Re-ordering Buffer Support Check - mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { + / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -9055,15 +9124,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dram_change_vactive_det_fill_delay_us); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us); + dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); #endif /*Mode Support, Voltage State and SOC Configuration*/ { - // s->dram_clock_change_support = 1; - // s->f_clock_change_support = 1; - if (mode_lib->ms.support.ScaleRatioAndTapsSupport && mode_lib->ms.support.SourceFormatPixelAndScanSupport && mode_lib->ms.support.ViewportSizeSupport @@ -9074,9 +9140,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out && !mode_lib->ms.support.ExceededMultistreamSlots && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink && !mode_lib->ms.support.NotEnoughLanesForMSO - //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2 && !mode_lib->ms.support.P2IWith420 - && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP && !mode_lib->ms.support.DSC422NativeNotSupported && mode_lib->ms.support.DSCSlicesODMModeSupported && !mode_lib->ms.support.NotEnoughDSCUnits @@ -9144,7 +9208,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(__DML_VBA_DEBUG__) if (!mode_lib->ms.support.ModeSupport) - dml2_print_dml_mode_support_info(&mode_lib->ms.support, true); + dml2_print_mode_support_info(&mode_lib->ms.support, true); dml2_printf("DML::%s: --- DONE --- \n", __func__); #endif @@ -9163,6 +9227,10 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + + for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -10697,7 +10765,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory); + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; @@ -10743,7 +10813,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -10829,9 +10898,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.dst_y_prefetch[k] < 2) s->DestinationLineTimesForPrefetchLessThan2 = true; - if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { s->VRatioPrefetchMoreThanMax = true; + dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + } if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); @@ -11165,6 +11238,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->mmSOCParameters.USRRetrainingLatency = 0; s->mmSOCParameters.SMNLatency = 0; s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -11184,7 +11260,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; @@ -11515,9 +11590,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) { + dml2_printf("DML::%s: ------------- START ----------\n", __func__); bool result = dml_core_mode_programming(in_out_params); - dml2_printf("DML::%s: ------------- START ----------\n", __func__); dml2_printf("DML::%s: result = %0d\n", __func__, result); dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -12167,6 +12242,8 @@ static void rq_dlg_get_dlg_reg( static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) { + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; @@ -12178,6 +12255,7 @@ static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, co arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes; arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib); arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); @@ -12427,7 +12505,7 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); - dml2_core_shared_div_rem(phantom_processing_delay_pix, + dml2_core_div_rem(phantom_processing_delay_pix, display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, &rem); if (rem) @@ -12470,7 +12548,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; - out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index 640087e862f84..28394de028855 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -10,7 +10,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_core_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h deleted file mode 100644 index f3356b072b59e..0000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h +++ /dev/null @@ -1,37 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_CORE_SHARED_H__ -#define __DML2_CORE_SHARED_H__ - -#define __DML_VBA_DEBUG__ -#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 // 0); + return dividend / divisor; + +} + +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + dml2_printf("DML: ===================================== \n"); + dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); + dml2_printf("DML: ===================================== \n"); +} + +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + + +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + switch (sw_mode) { + case (dml2_sw_linear): + return 256; break; + case (dml2_sw_256b_2d): + return 256; break; + case (dml2_sw_4kb_2d): + return 4096; break; + case (dml2_sw_64kb_2d): + return 65536; break; + case (dml2_sw_256kb_2d): + return 262144; break; + case (dml2_gfx11_sw_linear): + return 256; break; + case (dml2_gfx11_sw_64kb_d): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_t): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_x): + return 65536; break; + case (dml2_gfx11_sw_64kb_r_x): + return 65536; break; + case (dml2_gfx11_sw_256kb_d_x): + return 262144; break; + case (dml2_gfx11_sw_256kb_r_x): + return 262144; break; + default: + DML2_ASSERT(0); + return 256; + }; +} + + +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + + +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML2_ASSERT(0); + } + + return version; +} + +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = 0; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = 1; + break; + } + } + + dml2_assert(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.v_blank_end = phantom->timing.v_total - phantom->timing.v_front_porch - phantom->timing.v_active; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + return true; + case dml2_none: + default: + return false; + } +} +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmifrl: + return true; + case dml2_hdmi: + case dml2_none: + default: + return false; + } +} + + +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_edp: + return true; + case dml2_dp2p0: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp2p0: + return true; + case dml2_dp: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) + || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); +} + + +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + return true; + case dml2_dp_rate_na: + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + return true; + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + case dml2_dp_rate_na: + default: + return false; + } +} + +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) +{ + switch (odm_mode) { + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + return true; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_combine_2to1: + case dml2_odm_mode_combine_3to1: + case dml2_odm_mode_combine_4to1: + default: + return false; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h new file mode 100644 index 0000000000000..a5cc6a07167ae --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_UTILS_H__ +#define __DML2_CORE_UTILS_H__ +#include "dml2_internal_shared_types.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch); +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); + +#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index f19f6ebaae132..8869ea0893128 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -203,6 +203,26 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma return true; } +static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, + unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) +{ + unsigned long pll_frequency_khz; + + pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); + + *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); + + *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); + + if (dtbrefclk_khz > 0) { + *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); + } else { + *rounded_dtbrefclk_khz = 0; + } + + return true; +} + static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) { bool result = false; @@ -555,31 +575,39 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // but still the required dispclk can be more than the maximum dispclk speed: dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); - // DPP Ref is always set to max of all DPP clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; } - - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); - - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); - } + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); // DTB Ref is always set to max of all DTB clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; } + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); + + if (in_out->soc_bb->no_dfs) { + round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, + &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); + } else { + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + } - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); + + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); + } in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index dfd01440737df..3861bc6c96219 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -20,7 +20,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_dpmm_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 8e68a8094658f..a31db5742675d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -497,7 +497,6 @@ bool pmo_dcn3_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in in_out->cfg_support_info->plane_support_info[i].dpps_used)) { result = false; } else { - free_pipes -= planes_on_stream; break; } } else { @@ -666,7 +665,7 @@ bool pmo_dcn3_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_su struct dml2_pmo_instance *pmo = in_out->instance; unsigned int stream_index; bool success = false; - bool reached_end = true; + bool reached_end; memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 68b333b689337..92269f0e50ed2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -8,6 +8,7 @@ #include "dml2_pmo_dcn4_fams2.h" static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding +static const double MIN_BLANK_STUTTER_FACTOR = 3.0; static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { // VActive Preferred @@ -334,7 +335,6 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o in_out->cfg_support_info->plane_support_info[i].dpps_used)) { result = false; } else { - free_pipes -= planes_on_stream; break; } } else { @@ -672,8 +672,6 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) /* populate list */ expand_base_strategies(pmo, base_strategy_list_4_display, base_strategy_list_4_display_size, 4); break; - default: - break; } } @@ -718,7 +716,7 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; struct dml2_optimization_stage4_state *state = - &in_out->base_display_config->stage4; + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -943,8 +941,11 @@ static void build_synchronized_timing_groups( /* find synchronizable timing groups */ for (j = i + 1; j < display_config->display_config.num_streams; j++) { if (memcmp(master_timing, - &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0) { + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0 && + display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder && + (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match + display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) { set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); set_bit_in_bitfield(&stream_mapped_mask, j); } @@ -1444,7 +1445,7 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, /* DRR variable strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && - (pmo->options->disable_drr_clamped || + (pmo->options->disable_drr_clamped || (!stream_descriptor->timing.drr_config.enabled || (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || (pmo->options->disable_drr_clamped_when_var_active && @@ -1798,6 +1799,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp } if (s->pmo_dcn4.num_pstate_candidates > 0) { + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates - 1].allow_state_increase = true; s->pmo_dcn4.cur_pstate_candidate = -1; return true; } else { @@ -1910,7 +1912,8 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met if (is_bit_set_in_bitfield(plane_mask, plane_index)) { plane = &display_config->display_config.plane_descriptors[plane_index]; - plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, + plane->overrides.reserved_vblank_time_ns); display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; @@ -2138,6 +2141,7 @@ bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in struct dml2_pmo_instance *pmo = in_out->instance; bool stutter_period_meets_z8_eco = true; bool z8_stutter_optimization_too_expensive = false; + bool stutter_optimization_too_expensive = false; double line_time_us, vblank_nom_time_us; unsigned int i; @@ -2159,10 +2163,15 @@ bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in line_time_us = (double)in_out->base_display_config->display_config.stream_descriptors[i].timing.h_total / (in_out->base_display_config->display_config.stream_descriptors[i].timing.pixel_clock_khz * 1000) * 1000000; vblank_nom_time_us = line_time_us * in_out->base_display_config->display_config.stream_descriptors[i].timing.vblank_nom; - if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { z8_stutter_optimization_too_expensive = true; break; } + + if (vblank_nom_time_us < pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * MIN_BLANK_STUTTER_FACTOR) { + stutter_optimization_too_expensive = true; + break; + } } pmo->scratch.pmo_dcn4.num_stutter_candidates = 0; @@ -2178,7 +2187,7 @@ bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in pmo->scratch.pmo_dcn4.z8_vblank_optimizable = false; } - if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0) { + if (!stutter_optimization_too_expensive && pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0) { pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.num_stutter_candidates] = (unsigned int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us; pmo->scratch.pmo_dcn4.num_stutter_candidates++; } @@ -2196,15 +2205,15 @@ bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in unsigned int i; - for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && pmo->scratch.pmo_dcn4.z8_vblank_optimizable && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { success = false; break; } if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { success = false; break; } @@ -2223,8 +2232,11 @@ bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in if (!in_out->last_candidate_failed) { if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { - for (i = 0; i < in_out->optimized_display_config->display_config.num_streams; i++) { - in_out->optimized_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us = pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate]; + for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { + /* take the max of the current and the optimal reserved time */ + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = + (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); } success = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index 95f716e2641f4..add51d41a5158 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -26,7 +26,7 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_pmo_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index e17b5ceba4471..3bb835b5585ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -4,7 +4,9 @@ #include "lib_float_math.h" +#ifndef ASSERT #define ASSERT(condition) +#endif #define isNaN(number) ((number) != (number)) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c index dc8af4dd04108..d0e026d981b50 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c @@ -219,7 +219,6 @@ bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; lowest_state = 0; - cur_state = 0; while (highest_state > lowest_state) { cur_state = (highest_state + lowest_state) / 2; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 6eccf0241d857..1ed21c1b86a5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -258,12 +258,25 @@ static unsigned int find_preferred_pipe_candidates(const struct dc_state *existi * However this condition comes with a caveat. We need to ignore pipes that will * require a change in OPP but still have the same stream id. For example during * an MPC to ODM transiton. + * + * Adding check to avoid pipe select on the head pipe by utilizing dc resource + * helper function resource_get_primary_dpp_pipe and comparing the pipe index. */ if (existing_state) { for (i = 0; i < pipe_count; i++) { if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && - existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i && + (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe || + existing_state->res_ctx.pipe_ctx[i].next_odm_pipe)) continue; preferred_pipe_candidates[num_preferred_candidates++] = i; @@ -292,6 +305,14 @@ static unsigned int find_last_resort_pipe_candidates(const struct dc_state *exis */ if (existing_state) { for (i = 0; i < pipe_count; i++) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) || existing_state->res_ctx.pipe_ctx[i].stream_res.tg) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index 3ba184be25d38..140ec01545db8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -101,7 +101,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; bool plane_duplicate_exists; - unsigned int dp2_mst_stream_count; + int hpo_stream_to_link_encoder_mapping[MAX_HPO_DP2_ENCODERS]; }; struct dml2_helper_det_policy_scratch { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c index c4c52173ef224..11c904ae29586 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c @@ -303,7 +303,6 @@ void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_m if (project == dml_project_dcn35 || project == dml_project_dcn351) { policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; - policy->EnhancedPrefetchScheduleAccelerationFinal = 0; policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ policy->UseOnlyMaxPrefetchModes = 1; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 7e39873832bfc..bde4250853b10 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -733,8 +733,7 @@ static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st * } static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, const struct pipe_ctx *pipe, - unsigned int dp2_mst_stream_count) + const struct dc_stream_state *in, const struct pipe_ctx *pipe, struct dml2_context *dml2) { unsigned int output_bpc; @@ -747,8 +746,8 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (is_dp2p0_output_encoder(pipe, dp2_mst_stream_count)) - out->OutputEncoder[location] = dml_dp2p0; + if (dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: out->OutputEncoder[location] = dml_edp; @@ -1199,36 +1198,6 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, } } -static unsigned int calculate_dp2_mst_stream_count(struct dc_state *context) -{ - int i, j; - unsigned int dp2_mst_stream_count = 0; - - for (i = 0; i < context->stream_count; i++) { - struct dc_stream_state *stream = context->streams[i]; - - if (!stream || stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) - continue; - - for (j = 0; j < MAX_PIPES; j++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; - - if (!pipe_ctx || !pipe_ctx->stream) - continue; - - if (stream != pipe_ctx->stream) - continue; - - if (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc) { - dp2_mst_stream_count++; - break; - } - } - } - - return dp2_mst_stream_count; -} - static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, unsigned int location, const struct dc_stream_state *in) { @@ -1269,6 +1238,30 @@ static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cf } } } + +static void dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(struct dml2_context *dml2, struct dc_state *context) +{ + int i; + struct pipe_ctx *current_pipe_context; + + /* Scratch gets reset to zero in dml, but link encoder instance can be zero, so reset to -1 */ + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[i] = -1; + } + + /* If an HPO stream encoder is allocated to a pipe, get the instance of it's allocated HPO Link encoder */ + for (i = 0; i < MAX_PIPES; i++) { + current_pipe_context = &context->res_ctx.pipe_ctx[i]; + if (current_pipe_context->stream && + current_pipe_context->stream_res.hpo_dp_stream_enc && + current_pipe_context->link_res.hpo_dp_link_enc && + dc_is_dp_signal(current_pipe_context->stream->signal)) { + dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[current_pipe_context->stream_res.hpo_dp_stream_enc->inst] = + current_pipe_context->link_res.hpo_dp_link_enc->inst; + } + } +} + void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg) { int i = 0, j = 0, k = 0; @@ -1291,8 +1284,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (dml2->v20.dml_core_ctx.ip.hostvm_enable) dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; - dml2->v20.scratch.dp2_mst_stream_count = calculate_dp2_mst_stream_count(context); dml2_populate_pipe_to_plane_index_mapping(dml2, context); + dml2_map_hpo_stream_encoder_to_hpo_link_encoder_index(dml2, context); for (i = 0; i < context->stream_count; i++) { current_pipe_context = NULL; @@ -1313,7 +1306,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); /*Call site for populate_dml_writeback_cfg_from_stream_state*/ populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, disp_cfg_stream_location, context->streams[i]); @@ -1378,7 +1371,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index 55659b22d87f7..d764773938f4e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -36,6 +36,6 @@ void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe, unsigned int dp2_mst_stream_count); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); #endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 9e8ff3a9718e7..9a33158b63bf8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -153,7 +153,7 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e } } -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_mst_stream_count) +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) { if (pipe_ctx == NULL || pipe_ctx->stream == NULL) return false; @@ -161,14 +161,6 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_m /* If this assert is hit then we have a link encoder dynamic management issue */ ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); - /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ - if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0] && dp2_mst_stream_count > 1) { - return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->link_res.hpo_dp_link_enc && - dc_is_dp_signal(pipe_ctx->stream->signal) && - (pipe_ctx->stream->link->remote_sinks[0]->sink_id == pipe_ctx->stream->sink->sink_id)); - } - return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal)); @@ -181,7 +173,7 @@ bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i], context->bw_ctx.dml2->v20.scratch.dp2_mst_stream_count)) + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) return true; } return false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 866b0abcff1ba..4d64c45930da4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -533,14 +533,21 @@ static bool optimize_pstate_with_svp_and_drr(struct dml2_context *dml2, struct d static bool call_dml_mode_support_and_programming(struct dc_state *context) { unsigned int result = 0; - unsigned int min_state; + unsigned int min_state = 0; int min_state_for_g6_temp_read = 0; + + + if (!context) + return false; + struct dml2_context *dml2 = context->bw_ctx.dml2; struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); - ASSERT(min_state_for_g6_temp_read >= 0); + ASSERT(min_state_for_g6_temp_read >= 0); + } if (!dml2->config.use_native_pstate_optimization) { result = optimize_pstate_with_svp_and_drr(dml2, context); @@ -551,14 +558,20 @@ static bool call_dml_mode_support_and_programming(struct dc_state *context) /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. */ - if (min_state_for_g6_temp_read >= 0) - min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; - else - min_state = s->mode_support_params.out_lowest_state_idx; - - if (result) - result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + if (min_state_for_g6_temp_read >= 0) + min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; + else + min_state = s->mode_support_params.out_lowest_state_idx; + } + if (result) { + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + } else { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + } + } return result; } @@ -687,6 +700,8 @@ static bool dml2_validate_only(struct dc_state *context) build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config); result = pack_and_call_dml_mode_support_ex(dml2, &dml2->v20.scratch.cur_display_config, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h index cd1706d301e77..f09cba8e29cce 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn20/dcn20_dpp.h @@ -690,6 +690,7 @@ struct dcn20_dpp { int lb_memory_size; int lb_bits_per_entry; bool is_write_to_ram_a_safe; + bool dispclk_r_gate_disable; struct scaler_data scl_data; struct pwl_params pwl_data; }; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h index b110f35ef66bd..f236824126e94 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.h @@ -572,6 +572,7 @@ struct dcn3_dpp { int lb_memory_size; int lb_bits_per_entry; bool is_write_to_ram_a_safe; + bool dispclk_r_gate_disable; struct scaler_data scl_data; struct pwl_params pwl_data; }; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c index 8473c694bfdc2..62b7012cda430 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c @@ -50,13 +50,21 @@ void dpp35_dppclk_control( DPPCLK_RATE_CONTROL, dppclk_div, DPP_CLOCK_ENABLE, 1); else - REG_UPDATE_2(DPP_CONTROL, + if (dpp->dispclk_r_gate_disable) + REG_UPDATE_2(DPP_CONTROL, DPP_CLOCK_ENABLE, 1, DISPCLK_R_GATE_DISABLE, 1); + else + REG_UPDATE(DPP_CONTROL, + DPP_CLOCK_ENABLE, 1); } else - REG_UPDATE_2(DPP_CONTROL, + if (dpp->dispclk_r_gate_disable) + REG_UPDATE_2(DPP_CONTROL, DPP_CLOCK_ENABLE, 0, DISPCLK_R_GATE_DISABLE, 0); + else + REG_UPDATE(DPP_CONTROL, + DPP_CLOCK_ENABLE, 0); } void dpp35_program_bias_and_scale_fcnv( @@ -128,6 +136,10 @@ bool dpp35_construct( (const struct dcn3_dpp_mask *)(tf_mask)); dpp->base.funcs = &dcn35_dpp_funcs; + + // w/a for cursor memory stuck in LS by programming DISPCLK_R_GATE_DISABLE, limit w/a to some ASIC revs + if (dpp->base.ctx->asic_id.hw_internal_rev <= 0x10) + dpp->dispclk_r_gate_disable = true; return ret; } diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 92b34fe47f740..3b6ca7974e188 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,11 +120,10 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; + // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { - cur_rom_en = 1; - } + cur_rom_en = 1; } REG_UPDATE_3(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 505929800426d..2f92e7d4981ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -280,7 +280,8 @@ static void dpp401_dscl_set_scaler_filter( static void dpp401_dscl_set_scl_filter( struct dcn401_dpp *dpp, const struct scaler_data *scl_data, - bool chroma_coef_mode) + bool chroma_coef_mode, + bool force_coeffs_update) { bool h_2tap_hardcode_coef_en = false; bool v_2tap_hardcode_coef_en = false; @@ -343,7 +344,7 @@ static void dpp401_dscl_set_scl_filter( || (filter_v_c && (filter_v_c != dpp->filter_v_c)); } - if (filter_updated) { + if ((filter_updated) || (force_coeffs_update)) { uint32_t scl_mode = REG_READ(SCL_MODE); if (!h_2tap_hardcode_coef_en && filter_h) { @@ -656,274 +657,252 @@ static void dpp401_dscl_set_recout(struct dcn401_dpp *dpp, RECOUT_HEIGHT, recout->height); } /** - * dpp401_dscl_program_easf - Program EASF + * dpp401_dscl_program_easf_v - Program EASF_V * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info * - * This is the primary function to program EASF + * This is the primary function to program vertical EASF registers * */ -static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_data *scl_data) +static void dpp401_dscl_program_easf_v(struct dpp *dpp_base, const struct scaler_data *scl_data) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); PERF_TRACE(); - REG_UPDATE(DSCL_SC_MODE, - SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode); - REG_UPDATE(DSCL_SC_MODE, - SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); /* DSCL_EASF_V_MODE */ - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en); - REG_UPDATE(DSCL_EASF_V_MODE, - SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor); - REG_UPDATE(DSCL_EASF_V_MODE, + REG_SET_3(DSCL_EASF_V_MODE, 0, + SCL_EASF_V_EN, scl_data->dscl_prog_data.easf_v_en, + SCL_EASF_V_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_v_sharp_factor, SCL_EASF_V_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_v_ring); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, - SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_V_BF_CNTL, + + if (!scl_data->dscl_prog_data.easf_v_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_V_BF_CNTL */ + REG_SET_6(DSCL_EASF_V_BF_CNTL, 0, + SCL_EASF_V_BF1_EN, scl_data->dscl_prog_data.easf_v_bf1_en, + SCL_EASF_V_BF2_MODE, scl_data->dscl_prog_data.easf_v_bf2_mode, + SCL_EASF_V_BF3_MODE, scl_data->dscl_prog_data.easf_v_bf3_mode, + SCL_EASF_V_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat1_gain, + SCL_EASF_V_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_v_bf2_flat2_gain, SCL_EASF_V_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_v_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, - SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL1, + /* DSCL_EASF_V_RINGEST_3TAP_CNTLn */ + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL1, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_UPTILT, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_uptilt, SCL_EASF_V_RINGEST_3TAP_UPTILT_MAXVAL, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt_max); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, - SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL2, + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL2, 0, + SCL_EASF_V_RINGEST_3TAP_DNTILT_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_dntilt_slope, SCL_EASF_V_RINGEST_3TAP_UPTILT1_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt1_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, - SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope); - REG_UPDATE(DSCL_EASF_V_RINGEST_3TAP_CNTL3, + REG_SET_2(DSCL_EASF_V_RINGEST_3TAP_CNTL3, 0, + SCL_EASF_V_RINGEST_3TAP_UPTILT2_SLOPE, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_slope, SCL_EASF_V_RINGEST_3TAP_UPTILT2_OFFSET, scl_data->dscl_prog_data.easf_v_ringest_3tap_uptilt2_offset); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, - SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, + /* DSCL_EASF_V_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_V_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg1, SCL_EASF_V_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_v_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, - SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, + /* DSCL_EASF_V_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_V_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_V_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain1, SCL_EASF_V_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_v_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, - SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina); - REG_UPDATE(DSCL_EASF_V_BF_FINAL_MAX_MIN, + /* DSCL_EASF_V_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_V_BF_FINAL_MAX_MIN, 0, + SCL_EASF_V_BF_MAXA, scl_data->dscl_prog_data.easf_v_bf_maxa, + SCL_EASF_V_BF_MAXB, scl_data->dscl_prog_data.easf_v_bf_maxb, + SCL_EASF_V_BF_MINA, scl_data->dscl_prog_data.easf_v_bf_mina, SCL_EASF_V_BF_MINB, scl_data->dscl_prog_data.easf_v_bf_minb); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, - SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG0, + /* DSCL_EASF_V_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG0, 0, + SCL_EASF_V_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg0, + SCL_EASF_V_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg0, SCL_EASF_V_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, - SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG1, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG1, 0, + SCL_EASF_V_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg1, + SCL_EASF_V_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg1, SCL_EASF_V_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, - SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG2, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG2, 0, + SCL_EASF_V_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg2, + SCL_EASF_V_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg2, SCL_EASF_V_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, - SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG3, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG3, 0, + SCL_EASF_V_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg3, + SCL_EASF_V_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg3, SCL_EASF_V_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, - SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG4, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG4, 0, + SCL_EASF_V_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg4, + SCL_EASF_V_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg4, SCL_EASF_V_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, - SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG5, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG5, 0, + SCL_EASF_V_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg5, + SCL_EASF_V_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg5, SCL_EASF_V_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, - SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG6, + REG_SET_3(DSCL_EASF_V_BF1_PWL_SEG6, 0, + SCL_EASF_V_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg6, + SCL_EASF_V_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg6, SCL_EASF_V_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_v_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, - SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_V_BF1_PWL_SEG7, + REG_SET_2(DSCL_EASF_V_BF1_PWL_SEG7, 0, + SCL_EASF_V_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_in_seg7, SCL_EASF_V_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_v_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, - SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG0, + /* DSCL_EASF_V_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG0, 0, + SCL_EASF_V_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set0, + SCL_EASF_V_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set0, SCL_EASF_V_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, - SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG1, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG1, 0, + SCL_EASF_V_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set1, + SCL_EASF_V_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set1, SCL_EASF_V_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, - SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG2, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG2, 0, + SCL_EASF_V_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set2, + SCL_EASF_V_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set2, SCL_EASF_V_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, - SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG3, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG3, 0, + SCL_EASF_V_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set3, + SCL_EASF_V_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set3, SCL_EASF_V_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, - SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG4, + REG_SET_3(DSCL_EASF_V_BF3_PWL_SEG4, 0, + SCL_EASF_V_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set4, + SCL_EASF_V_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set4, SCL_EASF_V_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_v_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, - SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_V_BF3_PWL_SEG5, + REG_SET_2(DSCL_EASF_V_BF3_PWL_SEG5, 0, + SCL_EASF_V_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_in_set5, SCL_EASF_V_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_v_bf3_pwl_base_set5); + PERF_TRACE(); +} +/** + * dpp401_dscl_program_easf_h - Program EASF_H + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program horizontal EASF registers + * + */ +static void dpp401_dscl_program_easf_h(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); /* DSCL_EASF_H_MODE */ - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en); - REG_UPDATE(DSCL_EASF_H_MODE, - SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor); - REG_UPDATE(DSCL_EASF_H_MODE, + REG_SET_3(DSCL_EASF_H_MODE, 0, + SCL_EASF_H_EN, scl_data->dscl_prog_data.easf_h_en, + SCL_EASF_H_2TAP_SHARP_FACTOR, scl_data->dscl_prog_data.easf_h_sharp_factor, SCL_EASF_H_RINGEST_FORCE_EN, scl_data->dscl_prog_data.easf_h_ring); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, - SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain); - REG_UPDATE(DSCL_EASF_H_BF_CNTL, + + if (!scl_data->dscl_prog_data.easf_h_en) { + PERF_TRACE(); + return; + } + + /* DSCL_EASF_H_BF_CNTL */ + REG_SET_6(DSCL_EASF_H_BF_CNTL, 0, + SCL_EASF_H_BF1_EN, scl_data->dscl_prog_data.easf_h_bf1_en, + SCL_EASF_H_BF2_MODE, scl_data->dscl_prog_data.easf_h_bf2_mode, + SCL_EASF_H_BF3_MODE, scl_data->dscl_prog_data.easf_h_bf3_mode, + SCL_EASF_H_BF2_FLAT1_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat1_gain, + SCL_EASF_H_BF2_FLAT2_GAIN, scl_data->dscl_prog_data.easf_h_bf2_flat2_gain, SCL_EASF_H_BF2_ROC_GAIN, scl_data->dscl_prog_data.easf_h_bf2_roc_gain); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, - SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, + /* DSCL_EASF_H_RINGEST_EVENTAP_REDUCE */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_REDUCE, 0, + SCL_EASF_H_RINGEST_EVENTAP_REDUCEG1, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg1, SCL_EASF_H_RINGEST_EVENTAP_REDUCEG2, scl_data->dscl_prog_data.easf_h_ringest_eventap_reduceg2); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, - SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1); - REG_UPDATE(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, + /* DSCL_EASF_H_RINGEST_EVENTAP_GAIN */ + REG_SET_2(DSCL_EASF_H_RINGEST_EVENTAP_GAIN, 0, + SCL_EASF_H_RINGEST_EVENTAP_GAIN1, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain1, SCL_EASF_H_RINGEST_EVENTAP_GAIN2, scl_data->dscl_prog_data.easf_h_ringest_eventap_gain2); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, - SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina); - REG_UPDATE(DSCL_EASF_H_BF_FINAL_MAX_MIN, + /* DSCL_EASF_H_BF_FINAL_MAX_MIN */ + REG_SET_4(DSCL_EASF_H_BF_FINAL_MAX_MIN, 0, + SCL_EASF_H_BF_MAXA, scl_data->dscl_prog_data.easf_h_bf_maxa, + SCL_EASF_H_BF_MAXB, scl_data->dscl_prog_data.easf_h_bf_maxb, + SCL_EASF_H_BF_MINA, scl_data->dscl_prog_data.easf_h_bf_mina, SCL_EASF_H_BF_MINB, scl_data->dscl_prog_data.easf_h_bf_minb); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, - SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG0, + /* DSCL_EASF_H_BF1_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG0, 0, + SCL_EASF_H_BF1_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg0, + SCL_EASF_H_BF1_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg0, SCL_EASF_H_BF1_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg0); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, - SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG1, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG1, 0, + SCL_EASF_H_BF1_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg1, + SCL_EASF_H_BF1_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg1, SCL_EASF_H_BF1_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg1); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, - SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG2, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG2, 0, + SCL_EASF_H_BF1_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg2, + SCL_EASF_H_BF1_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg2, SCL_EASF_H_BF1_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg2); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, - SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG3, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG3, 0, + SCL_EASF_H_BF1_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg3, + SCL_EASF_H_BF1_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg3, SCL_EASF_H_BF1_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg3); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, - SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG4, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG4, 0, + SCL_EASF_H_BF1_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg4, + SCL_EASF_H_BF1_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg4, SCL_EASF_H_BF1_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg4); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, - SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG5, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG5, 0, + SCL_EASF_H_BF1_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg5, + SCL_EASF_H_BF1_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg5, SCL_EASF_H_BF1_PWL_SLOPE_SEG5, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg5); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, - SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG6, + REG_SET_3(DSCL_EASF_H_BF1_PWL_SEG6, 0, + SCL_EASF_H_BF1_PWL_IN_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg6, + SCL_EASF_H_BF1_PWL_BASE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg6, SCL_EASF_H_BF1_PWL_SLOPE_SEG6, scl_data->dscl_prog_data.easf_h_bf1_pwl_slope_seg6); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, - SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7); - REG_UPDATE(DSCL_EASF_H_BF1_PWL_SEG7, + REG_SET_2(DSCL_EASF_H_BF1_PWL_SEG7, 0, + SCL_EASF_H_BF1_PWL_IN_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_in_seg7, SCL_EASF_H_BF1_PWL_BASE_SEG7, scl_data->dscl_prog_data.easf_h_bf1_pwl_base_seg7); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, - SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG0, + /* DSCL_EASF_H_BF3_PWL_SEGn */ + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG0, 0, + SCL_EASF_H_BF3_PWL_IN_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set0, + SCL_EASF_H_BF3_PWL_BASE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set0, SCL_EASF_H_BF3_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set0); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, - SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG1, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG1, 0, + SCL_EASF_H_BF3_PWL_IN_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set1, + SCL_EASF_H_BF3_PWL_BASE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set1, SCL_EASF_H_BF3_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set1); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, - SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG2, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG2, 0, + SCL_EASF_H_BF3_PWL_IN_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set2, + SCL_EASF_H_BF3_PWL_BASE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set2, SCL_EASF_H_BF3_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set2); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, - SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG3, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG3, 0, + SCL_EASF_H_BF3_PWL_IN_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set3, + SCL_EASF_H_BF3_PWL_BASE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set3, SCL_EASF_H_BF3_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set3); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, - SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG4, + REG_SET_3(DSCL_EASF_H_BF3_PWL_SEG4, 0, + SCL_EASF_H_BF3_PWL_IN_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set4, + SCL_EASF_H_BF3_PWL_BASE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set4, SCL_EASF_H_BF3_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.easf_h_bf3_pwl_slope_set4); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, - SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5); - REG_UPDATE(DSCL_EASF_H_BF3_PWL_SEG5, + REG_SET_2(DSCL_EASF_H_BF3_PWL_SEG5, 0, + SCL_EASF_H_BF3_PWL_IN_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_in_set5, SCL_EASF_H_BF3_PWL_BASE_SEG5, scl_data->dscl_prog_data.easf_h_bf3_pwl_base_set5); + PERF_TRACE(); +} +/** + * dpp401_dscl_program_easf - Program EASF + * + * @dpp_base: High level DPP struct + * @scl_data: scalaer_data info + * + * This is the primary function to program EASF + * + */ +static void dpp401_dscl_program_easf(struct dpp *dpp_base, const struct scaler_data *scl_data) +{ + struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + + PERF_TRACE(); + /* DSCL_SC_MODE */ + REG_SET_2(DSCL_SC_MODE, 0, + SCL_SC_MATRIX_MODE, scl_data->dscl_prog_data.easf_matrix_mode, + SCL_SC_LTONL_EN, scl_data->dscl_prog_data.easf_ltonl_en); /* DSCL_EASF_SC_MATRIX_C0C1, DSCL_EASF_SC_MATRIX_C2C3 */ - REG_UPDATE(DSCL_SC_MATRIX_C0C1, - SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0); - REG_UPDATE(DSCL_SC_MATRIX_C0C1, + REG_SET_2(DSCL_SC_MATRIX_C0C1, 0, + SCL_SC_MATRIX_C0, scl_data->dscl_prog_data.easf_matrix_c0, SCL_SC_MATRIX_C1, scl_data->dscl_prog_data.easf_matrix_c1); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, - SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2); - REG_UPDATE(DSCL_SC_MATRIX_C2C3, + REG_SET_2(DSCL_SC_MATRIX_C2C3, 0, + SCL_SC_MATRIX_C2, scl_data->dscl_prog_data.easf_matrix_c2, SCL_SC_MATRIX_C3, scl_data->dscl_prog_data.easf_matrix_c3); + dpp401_dscl_program_easf_v(dpp_base, scl_data); + dpp401_dscl_program_easf_h(dpp_base, scl_data); PERF_TRACE(); } /** @@ -958,10 +937,11 @@ static void dpp401_dscl_set_isharp_filter( REG_UPDATE(ISHARP_DELTA_CTRL, ISHARP_DELTA_LUT_HOST_SELECT, 0); + /* LUT data write is auto-indexed. Write index once */ + REG_SET(ISHARP_DELTA_INDEX, 0, + ISHARP_DELTA_INDEX, 0); for (level = 0; level < NUM_LEVELS; level++) { filter_data = filter[level]; - REG_SET(ISHARP_DELTA_INDEX, 0, - ISHARP_DELTA_INDEX, level); REG_SET(ISHARP_DELTA_DATA, 0, ISHARP_DELTA_DATA, filter_data); } @@ -971,112 +951,83 @@ static void dpp401_dscl_set_isharp_filter( * * @dpp_base: High level DPP struct * @scl_data: scalaer_data info + * @program_isharp_1dlut: flag to program isharp 1D LUT + * @bs_coeffs_updated: Blur and Scale Coefficients update flag * * This is the primary function to program isharp * */ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, - const struct scaler_data *scl_data) + const struct scaler_data *scl_data, + bool program_isharp_1dlut, + bool *bs_coeffs_updated) { struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); + *bs_coeffs_updated = false; PERF_TRACE(); - /* ISHARP_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_EN, scl_data->dscl_prog_data.isharp_en); - /* ISHARP_NOISEDET_EN */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable); - /* ISHARP_NOISEDET_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - REG_UPDATE(ISHARP_MODE, - ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode); - /* ISHARP_NOISEDET_UTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, - ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold); - /* ISHARP_NOISEDET_DTHRE */ - REG_UPDATE(ISHARP_NOISEDET_THRESHOLD, + /* ISHARP_MODE */ + REG_SET_6(ISHARP_MODE, 0, + ISHARP_EN, scl_data->dscl_prog_data.isharp_en, + ISHARP_NOISEDET_EN, scl_data->dscl_prog_data.isharp_noise_det.enable, + ISHARP_NOISEDET_MODE, scl_data->dscl_prog_data.isharp_noise_det.mode, + ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode, + ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode, + ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); + + /* Skip remaining register programming if ISHARP is disabled */ + if (!scl_data->dscl_prog_data.isharp_en) { + PERF_TRACE(); + return; + } + + /* ISHARP_NOISEDET_THRESHOLD */ + REG_SET_2(ISHARP_NOISEDET_THRESHOLD, 0, + ISHARP_NOISEDET_UTHRE, scl_data->dscl_prog_data.isharp_noise_det.uthreshold, ISHARP_NOISEDET_DTHRE, scl_data->dscl_prog_data.isharp_noise_det.dthreshold); - /* ISHARP_NOISEDET_PWL_START_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in); - /* ISHARP_NOISEDET_PWL_END_IN */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, - ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in); - /* ISHARP_NOISEDET_PWL_SLOPE */ - REG_UPDATE(ISHARP_NOISE_GAIN_PWL, + + /* ISHARP_NOISE_GAIN_PWL */ + REG_SET_3(ISHARP_NOISE_GAIN_PWL, 0, + ISHARP_NOISEDET_PWL_START_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_start_in, + ISHARP_NOISEDET_PWL_END_IN, scl_data->dscl_prog_data.isharp_noise_det.pwl_end_in, ISHARP_NOISEDET_PWL_SLOPE, scl_data->dscl_prog_data.isharp_noise_det.pwl_slope); - /* ISHARP_LBA_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_LBA_MODE, scl_data->dscl_prog_data.isharp_lba.mode); + /* ISHARP_LBA: IN_SEG, BASE_SEG, SLOPE_SEG */ - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, - ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG0, + REG_SET_3(ISHARP_LBA_PWL_SEG0, 0, + ISHARP_LBA_PWL_IN_SEG0, scl_data->dscl_prog_data.isharp_lba.in_seg[0], + ISHARP_LBA_PWL_BASE_SEG0, scl_data->dscl_prog_data.isharp_lba.base_seg[0], ISHARP_LBA_PWL_SLOPE_SEG0, scl_data->dscl_prog_data.isharp_lba.slope_seg[0]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, - ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG1, + REG_SET_3(ISHARP_LBA_PWL_SEG1, 0, + ISHARP_LBA_PWL_IN_SEG1, scl_data->dscl_prog_data.isharp_lba.in_seg[1], + ISHARP_LBA_PWL_BASE_SEG1, scl_data->dscl_prog_data.isharp_lba.base_seg[1], ISHARP_LBA_PWL_SLOPE_SEG1, scl_data->dscl_prog_data.isharp_lba.slope_seg[1]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, - ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG2, + REG_SET_3(ISHARP_LBA_PWL_SEG2, 0, + ISHARP_LBA_PWL_IN_SEG2, scl_data->dscl_prog_data.isharp_lba.in_seg[2], + ISHARP_LBA_PWL_BASE_SEG2, scl_data->dscl_prog_data.isharp_lba.base_seg[2], ISHARP_LBA_PWL_SLOPE_SEG2, scl_data->dscl_prog_data.isharp_lba.slope_seg[2]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, - ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG3, + REG_SET_3(ISHARP_LBA_PWL_SEG3, 0, + ISHARP_LBA_PWL_IN_SEG3, scl_data->dscl_prog_data.isharp_lba.in_seg[3], + ISHARP_LBA_PWL_BASE_SEG3, scl_data->dscl_prog_data.isharp_lba.base_seg[3], ISHARP_LBA_PWL_SLOPE_SEG3, scl_data->dscl_prog_data.isharp_lba.slope_seg[3]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, - ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG4, + REG_SET_3(ISHARP_LBA_PWL_SEG4, 0, + ISHARP_LBA_PWL_IN_SEG4, scl_data->dscl_prog_data.isharp_lba.in_seg[4], + ISHARP_LBA_PWL_BASE_SEG4, scl_data->dscl_prog_data.isharp_lba.base_seg[4], ISHARP_LBA_PWL_SLOPE_SEG4, scl_data->dscl_prog_data.isharp_lba.slope_seg[4]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, - ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5]); - REG_UPDATE(ISHARP_LBA_PWL_SEG5, + REG_SET_2(ISHARP_LBA_PWL_SEG5, 0, + ISHARP_LBA_PWL_IN_SEG5, scl_data->dscl_prog_data.isharp_lba.in_seg[5], ISHARP_LBA_PWL_BASE_SEG5, scl_data->dscl_prog_data.isharp_lba.base_seg[5]); - /* ISHARP_FMT_MODE */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_MODE, scl_data->dscl_prog_data.isharp_fmt.mode); - /* ISHARP_FMT_NORM */ - REG_UPDATE(ISHARP_MODE, - ISHARP_FMT_NORM, scl_data->dscl_prog_data.isharp_fmt.norm); /* ISHARP_DELTA_LUT */ - dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); - /* ISHARP_NLDELTA_SCLIP_EN_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p); - /* ISHARP_NLDELTA_SCLIP_PIVOT_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p); - /* ISHARP_NLDELTA_SCLIP_SLOPE_P */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p); - /* ISHARP_NLDELTA_SCLIP_EN_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n); - /* ISHARP_NLDELTA_SCLIP_PIVOT_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, - ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n); - /* ISHARP_NLDELTA_SCLIP_SLOPE_N */ - REG_UPDATE(ISHARP_NLDELTA_SOFT_CLIP, + if (!program_isharp_1dlut) + dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); + + /* ISHARP_NLDELTA_SOFT_CLIP */ + REG_SET_6(ISHARP_NLDELTA_SOFT_CLIP, 0, + ISHARP_NLDELTA_SCLIP_EN_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_p, + ISHARP_NLDELTA_SCLIP_PIVOT_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_p, + ISHARP_NLDELTA_SCLIP_SLOPE_P, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_p, + ISHARP_NLDELTA_SCLIP_EN_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.enable_n, + ISHARP_NLDELTA_SCLIP_PIVOT_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.pivot_n, ISHARP_NLDELTA_SCLIP_SLOPE_N, scl_data->dscl_prog_data.isharp_nldelta_sclip.slope_n); /* Blur and Scale Coefficients - SCL_COEF_RAM_TAP_SELECT */ @@ -1086,12 +1037,14 @@ static void dpp401_dscl_program_isharp(struct dpp *dpp_base, dpp, scl_data->taps.v_taps, SCL_COEF_VERTICAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_v); + *bs_coeffs_updated = true; } if (scl_data->dscl_prog_data.filter_blur_scale_h) { dpp401_dscl_set_scaler_filter( dpp, scl_data->taps.h_taps, SCL_COEF_HORIZONTAL_BLUR_SCALE, scl_data->dscl_prog_data.filter_blur_scale_h); + *bs_coeffs_updated = true; } } PERF_TRACE(); @@ -1122,12 +1075,30 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; + bool program_isharp_1dlut = false; + bool bs_coeffs_updated = false; + if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) return; PERF_TRACE(); + /* If only sharpness has changed, then only update 1dlut, then return */ + if (scl_data->dscl_prog_data.isharp_en && + (dpp->scl_data.dscl_prog_data.sharpness_level + != scl_data->dscl_prog_data.sharpness_level)) { + /* ISHARP_DELTA_LUT */ + dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); + dpp->scl_data.dscl_prog_data.sharpness_level = scl_data->dscl_prog_data.sharpness_level; + memcpy(dpp->scl_data.dscl_prog_data.isharp_delta, scl_data->dscl_prog_data.isharp_delta, + sizeof(uint32_t) * ISHARP_LUT_TABLE_SIZE); + + if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) + return; + program_isharp_1dlut = true; + } + dpp->scl_data = *scl_data; if ((dpp->base.ctx->dc->config.use_spl) && (!dpp->base.ctx->dc->debug.disable_spl)) { @@ -1181,7 +1152,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); - dpp401_dscl_program_isharp(dpp_base, scl_data); + dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated); return; } @@ -1208,12 +1179,18 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, SCL_V_NUM_TAPS_C, v_num_taps_c, SCL_H_NUM_TAPS_C, h_num_taps_c); - dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr); + /* ISharp configuration + * - B&S coeffs are written to same coeff RAM as WB scaler coeffs + * - coeff RAM toggle is in EASF programming + * - if we are only programming B&S coeffs, then need to reprogram + * WB scaler coeffs and toggle coeff RAM together + */ + //if (dpp->base.ctx->dc->config.prefer_easf) + dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated); + + dpp401_dscl_set_scl_filter(dpp, scl_data, ycbcr, bs_coeffs_updated); /* Edge adaptive scaler function configuration */ if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_program_easf(dpp_base, scl_data); - /* isharp configuration */ - //if (dpp->base.ctx->dc->config.prefer_easf) - dpp401_dscl_program_isharp(dpp_base, scl_data); PERF_TRACE(); } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index a1727e5bf0247..ebd5df1a36e8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -668,6 +668,7 @@ static bool decide_dsc_bandwidth_range( */ static bool decide_dsc_target_bpp_x16( const struct dc_dsc_policy *policy, + const struct dc_dsc_config_options *options, const struct dsc_enc_caps *dsc_common_caps, const int target_bandwidth_kbps, const struct dc_crtc_timing *timing, @@ -682,7 +683,7 @@ static bool decide_dsc_target_bpp_x16( if (decide_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16, num_slices_h, dsc_common_caps, timing, link_encoding, &range)) { if (target_bandwidth_kbps >= range.stream_kbps) { - if (policy->enable_dsc_when_not_needed) + if (policy->enable_dsc_when_not_needed || options->force_dsc_when_not_needed) /* enable max bpp even dsc is not needed */ *target_bpp_x16 = range.max_target_bpp_x16; } else if (target_bandwidth_kbps >= range.max_kbps) { @@ -882,7 +883,7 @@ static bool setup_dsc_config( memset(dsc_cfg, 0, sizeof(struct dc_dsc_config)); - dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy); + dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding); pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right; pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; @@ -1080,6 +1081,7 @@ static bool setup_dsc_config( if (target_bandwidth_kbps > 0) { is_dsc_possible = decide_dsc_target_bpp_x16( &policy, + options, &dsc_common_caps, target_bandwidth_kbps, timing, @@ -1171,7 +1173,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps( void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, - struct dc_dsc_policy *policy) + struct dc_dsc_policy *policy, + const enum dc_link_encoding_format link_encoding) { uint32_t bpc = 0; @@ -1235,10 +1238,7 @@ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, policy->max_target_bpp = max_target_bpp_limit_override_x16 / 16; /* enable DSC when not needed, default false */ - if (dsc_policy_enable_dsc_when_not_needed) - policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; - else - policy->enable_dsc_when_not_needed = false; + policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed; } void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit) @@ -1267,4 +1267,5 @@ void dc_dsc_get_default_config_option(const struct dc *dc, struct dc_dsc_config_ options->dsc_force_odm_hslice_override = dc->debug.force_odm_combine; options->max_target_bpp_limit_override_x16 = 0; options->slice_height_granularity = 1; + options->force_dsc_when_not_needed = false; } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index 75128fd343067..ada393b613834 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -296,7 +296,9 @@ void dsc_log_pps(struct display_stream_compressor *dsc, struct drm_dsc_config *p DC_LOG_DSC("\tline_buf_depth %d", pps->line_buf_depth); DC_LOG_DSC("\tblock_pred_enable %d", pps->block_pred_enable); DC_LOG_DSC("\tconvert_rgb %d", pps->convert_rgb); +#ifdef HAVE_DRM_DSC_CONFIG_SIMPLE_422 DC_LOG_DSC("\tsimple_422 %d", pps->simple_422); +#endif DC_LOG_DSC("\tvbr_enable %d", pps->vbr_enable); DC_LOG_DSC("\tbits_per_pixel %d (%d.%04d)", bits_per_pixel, bits_per_pixel / 16, ((bits_per_pixel % 16) * 10000) / 16); DC_LOG_DSC("\tpic_height %d", pps->pic_height); @@ -433,7 +435,9 @@ bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values dsc_reg_vals->pps.convert_rgb = dsc_reg_vals->pixel_format == DSC_PIXFMT_RGB ? 1 : 0; dsc_reg_vals->pps.native_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR422); dsc_reg_vals->pps.native_420 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_NATIVE_YCBCR420); +#ifdef HAVE_DRM_DSC_CONFIG_SIMPLE_422 dsc_reg_vals->pps.simple_422 = (dsc_reg_vals->pixel_format == DSC_PIXFMT_SIMPLE_YCBCR422); +#endif calc_rc_params(&rc, &dsc_reg_vals->pps); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h index 1fb90b52b814b..cec8d03c96714 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.h @@ -609,4 +609,3 @@ void dsc2_disconnect(struct display_stream_compressor *dsc); void dsc2_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); #endif - diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 6acb6699f146e..61678b0a5a1e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -27,7 +27,7 @@ static void dsc401_disconnect(struct display_stream_compressor *dsc); static void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); -const struct dsc_funcs dcn401_dsc_funcs = { +static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_get_enc_caps = dsc401_get_enc_caps, .dsc_read_state = dsc401_read_state, .dsc_validate_stream = dsc401_validate_stream, diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c index 64cee8c80110c..25ea69bd2e820 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc.c @@ -47,9 +47,14 @@ void calc_rc_params(struct rc_params *rc, const struct drm_dsc_config *pps) int slice_width = pps->slice_width; int slice_height = pps->slice_height; +#ifdef HAVE_DRM_DSC_CONFIG_SIMPLE_422 mode = pps->convert_rgb ? CM_RGB : (pps->simple_422 ? CM_444 : (pps->native_422 ? CM_422 : pps->native_420 ? CM_420 : CM_444)); +#else + mode = pps->convert_rgb ? CM_RGB : (pps->native_422 ? CM_422 : + pps->native_420 ? CM_420 : CM_444); +#endif bpc = (pps->bits_per_component == 8) ? BPC_8 : (pps->bits_per_component == 10) ? BPC_10 : BPC_12; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c index 59864130cf83b..6f5ad09ad1404 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/rc_calc_dpi.c @@ -33,7 +33,9 @@ static void copy_pps_fields(struct drm_dsc_config *to, const struct drm_dsc_conf to->convert_rgb = from->convert_rgb; to->slice_width = from->slice_width; to->slice_height = from->slice_height; +#ifdef HAVE_DRM_DSC_CONFIG_SIMPLE_422 to->simple_422 = from->simple_422; +#endif to->native_422 = from->native_422; to->native_420 = from->native_420; to->pic_width = from->pic_width; @@ -119,4 +121,3 @@ int dscc_compute_dsc_parameters(const struct drm_dsc_config *pps, dsc_params->rc_buffer_model_size = dsc_cfg.rc_bits; return ret; } - diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h index a1e2cde9c4cca..9fbd45c7dfef2 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h @@ -198,6 +198,9 @@ struct dcn_hubbub_registers { uint32_t DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B; uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_A; uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_B; + uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL1; + uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL2; + uint32_t DCHUBBUB_CTRL_STATUS; }; #define HUBBUB_REG_FIELD_LIST_DCN32(type) \ @@ -313,7 +316,17 @@ struct dcn_hubbub_registers { type DCN_VM_ERROR_VMID;\ type DCN_VM_ERROR_TABLE_LEVEL;\ type DCN_VM_ERROR_PIPE;\ - type DCN_VM_ERROR_INTERRUPT_STATUS + type DCN_VM_ERROR_INTERRUPT_STATUS;\ + type DCHUBBUB_TIMEOUT_ERROR_STATUS;\ + type DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD;\ + type DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD;\ + type DCHUBBUB_TIMEOUT_DETECTION_EN;\ + type DCHUBBUB_TIMEOUT_TIMER_RESET;\ + type ROB_UNDERFLOW_STATUS;\ + type ROB_OVERFLOW_STATUS;\ + type ROB_OVERFLOW_CLEAR;\ + type DCHUBBUB_HW_DEBUG;\ + type CSTATE_SWATH_CHK_GOOD_MODE #define HUBBUB_STUTTER_REG_FIELD_LIST(type) \ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h index 036bb3e6c9575..46d8f5c70750a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h @@ -96,6 +96,7 @@ struct dcn20_hubbub { unsigned int det1_size; unsigned int det2_size; unsigned int det3_size; + bool allow_sdpif_rate_limit_when_cstate_req; }; void hubbub2_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 6293173ba2b9d..5eb3da8d5206e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -545,6 +545,7 @@ static void hubbub35_init(struct hubbub *hubbub) DCHUBBUB_ARB_MAX_REQ_OUTSTAND, 256, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 256); + memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate)); } /*static void hubbub35_set_request_limit(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 37d26fa0b6fbb..92fab471b1836 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1192,6 +1192,37 @@ static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) } } +static bool dcn401_program_arbiter(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + bool wm_pending = false; + uint32_t temp; + + /* request backpressure and outstanding return threshold (unused)*/ + //REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, arb_regs->req_stall_threshold); + + /* P-State stall threshold */ + REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, arb_regs->pstate_stall_threshold); + + if (safe_to_lower || arb_regs->allow_sdpif_rate_limit_when_cstate_req > hubbub2->allow_sdpif_rate_limit_when_cstate_req) { + hubbub2->allow_sdpif_rate_limit_when_cstate_req = arb_regs->allow_sdpif_rate_limit_when_cstate_req; + + /* only update the required bits */ + REG_GET(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, &temp); + if (hubbub2->allow_sdpif_rate_limit_when_cstate_req) { + temp |= (1 << 5); + } else { + temp &= ~(1 << 5); + } + REG_UPDATE(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, temp); + } else { + wm_pending = true; + } + + return wm_pending; +} + static const struct hubbub_funcs hubbub4_01_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -1215,6 +1246,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, .wait_for_det_update = dcn401_wait_for_det_update, + .program_arbiter = dcn401_program_arbiter, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h index f35f19ba3e18b..b1d9ea9d1c3d6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h @@ -123,8 +123,17 @@ HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DCFCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\ HUBBUB_SF(DCHUBBUB_SDPIF_CFG0, SDPIF_PORT_CONTROL, mask_sh),\ HUBBUB_SF(DCHUBBUB_SDPIF_CFG1, SDPIF_MAX_NUM_OUTSTANDING, mask_sh),\ - HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh) - + HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_ERROR_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_DETECTION_EN, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_UNDERFLOW_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_CLEAR, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, CSTATE_SWATH_CHK_GOOD_MODE, mask_sh) bool hubbub401_program_urgent_watermarks( struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index 771fcd0d3b991..d1f05b82b3dd5 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -188,7 +188,7 @@ void hubp35_program_surface_config( hubp35_program_pixel_format(hubp, format); } -struct hubp_funcs dcn35_hubp_funcs = { +static struct hubp_funcs dcn35_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 217344ccf6440..427fd6ea062a1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -57,6 +57,7 @@ #include "panel_cntl.h" #include "dc_state_priv.h" #include "dpcd_defs.h" +#include "dsc.h" /* include DCE11 register header files */ #include "dce/dce_11_0_d.h" #include "dce/dce_11_0_sh_mask.h" @@ -1038,7 +1039,8 @@ void dce110_edp_backlight_control( link_transmitter_control(ctx->dc_bios, &cntl); if (enable && link->dpcd_sink_ext_caps.bits.oled && - !link->dc->config.edp_no_power_sequencing) { + !link->dc->config.edp_no_power_sequencing && + !link->local_sink->edid_caps.panel_patch.oled_optimize_display_on) { post_T7_delay += link->panel_config.pps.extra_post_t7_ms; msleep(post_T7_delay); } @@ -1232,20 +1234,21 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) * has changed or they enter protection state and hang. */ msleep(60); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing) { - /* - * Sometimes, DP receiver chip power-controlled externally by an - * Embedded Controller could be treated and used as eDP, - * if it drives mobile display. In this case, - * we shouldn't be doing power-sequencing, hence we can skip - * waiting for T9-ready. - */ - link->dc->link_srv->edp_receiver_ready_T9(link); - } } } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + !link->dc->config.edp_no_power_sequencing) { + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + link->dc->link_srv->edp_receiver_ready_T9(link); + } + } @@ -1822,6 +1825,48 @@ static void get_edp_links_with_sink( } } +static void clean_up_dsc_blocks(struct dc *dc) +{ + struct display_stream_compressor *dsc = NULL; + struct timing_generator *tg = NULL; + struct stream_encoder *se = NULL; + struct dccg *dccg = dc->res_pool->dccg; + struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; + int i; + + if (dc->ctx->dce_version != DCN_VERSION_3_5 && + dc->ctx->dce_version != DCN_VERSION_3_51) + return; + + for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { + struct dcn_dsc_state s = {0}; + + dsc = dc->res_pool->dscs[i]; + dsc->funcs->dsc_read_state(dsc, &s); + if (s.dsc_fw_en) { + /* disable DSC in OPTC */ + if (i < dc->res_pool->timing_generator_count) { + tg = dc->res_pool->timing_generators[i]; + tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0); + } + /* disable DSC in stream encoder */ + if (i < dc->res_pool->stream_enc_count) { + se = dc->res_pool->stream_enc[i]; + se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0); + se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true); + } + /* disable DSC block */ + if (dccg->funcs->set_ref_dscclk) + dccg->funcs->set_ref_dscclk(dccg, dsc->inst); + dsc->funcs->dsc_disable(dsc); + + /* power down DSC */ + if (pg_cntl != NULL) + pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false); + } + } +} + /* * When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need: * 1. Power down all DC HW blocks @@ -1926,6 +1971,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); power_down_all_hw_blocks(dc); + + /* DSC could be enabled on eDP during VBIOS post. + * To clean up dsc blocks if eDP is in link but not active. + */ + if (edp_link_with_sink && (edp_stream_num == 0)) + clean_up_dsc_blocks(dc); + disable_vga_and_power_gate_all_controllers(dc); if (edp_link_with_sink && !keep_edp_vdd_on) dc->hwss.edp_power_control(edp_link_with_sink, false); @@ -2045,13 +2097,20 @@ static void set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); - - if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); + } } } @@ -2350,19 +2409,6 @@ static void dce110_setup_audio_dto( } } -static bool dce110_is_hpo_enabled(struct dc_state *context) -{ - int i; - - for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { - if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { - return true; - } - } - - return false; -} - enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context) @@ -2371,8 +2417,8 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc_bios *dcb = dc->ctx->dc_bios; enum dc_status status; int i; - bool was_hpo_enabled = dce110_is_hpo_enabled(dc->current_state); - bool is_hpo_enabled = dce110_is_hpo_enabled(context); + bool was_hpo_acquired = resource_is_hpo_acquired(dc->current_state); + bool is_hpo_acquired = resource_is_hpo_acquired(context); /* reset syncd pipes from disabled pipes */ if (dc->config.use_pipe_ctx_sync_logic) @@ -2415,8 +2461,8 @@ enum dc_status dce110_apply_ctx_to_hw( dce110_setup_audio_dto(dc, context); - if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_enabled != is_hpo_enabled) { - dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_enabled); + if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_acquired != is_hpo_acquired) { + dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_acquired); } for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -3097,9 +3143,10 @@ static void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) } bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp) + struct set_backlight_level_params *params) { + uint32_t backlight_pwm_u16_16 = params->backlight_pwm_u16_16; + uint32_t frame_ramp = params->frame_ramp; struct dc_link *link = pipe_ctx->stream->link; struct dc *dc = link->ctx->dc; struct abm *abm = pipe_ctx->stream_res.abm; @@ -3270,7 +3317,7 @@ void dce110_disable_link_output(struct dc_link *link, * from enable/disable link output and only call edp panel control * in enable_link_dp and disable_link_dp once. */ - if (dmcu != NULL && dmcu->funcs->lock_phy) + if (dmcu != NULL && dmcu->funcs->unlock_phy) dmcu->funcs->unlock_phy(dmcu); dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h index ed3cc3648e8e2..06789ac3a2245 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h @@ -88,8 +88,7 @@ void dce110_edp_wait_for_hpd_ready( bool power_up); bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp); + struct set_backlight_level_params *params); void dce110_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx); void dce110_set_pipe(struct pipe_ctx *pipe_ctx); void dce110_disable_link_output(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 01dffed4d30ba..a6a1db5ba8bad 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3212,15 +3212,19 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index a80c085829320..1a32e53c1b22a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1732,7 +1732,6 @@ static void dcn20_update_dchubp_dpp( if (pipe_ctx->update_flags.bits.scaler || plane_state->update_flags.bits.scaling_change || plane_state->update_flags.bits.position_change || - plane_state->update_flags.bits.clip_size_change || plane_state->update_flags.bits.per_pixel_alpha_change || pipe_ctx->stream->update_flags.bits.scaling) { pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->plane_state->per_pixel_alpha; @@ -1745,7 +1744,6 @@ static void dcn20_update_dchubp_dpp( if (pipe_ctx->update_flags.bits.viewport || (context == dc->current_state && plane_state->update_flags.bits.position_change) || (context == dc->current_state && plane_state->update_flags.bits.scaling_change) || - (context == dc->current_state && plane_state->update_flags.bits.clip_size_change) || (context == dc->current_state && pipe_ctx->stream->update_flags.bits.scaling)) { hubp->funcs->mem_program_viewport( @@ -2056,22 +2054,15 @@ void dcn20_program_front_end_for_ctx( */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream; + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream && - dc_state_get_pipe_subvp_type(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM) { + dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) { struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; if (tg->funcs->enable_crtc) { - if (dc->hwss.blank_phantom) { - int main_pipe_width = 0, main_pipe_height = 0; - struct dc_stream_state *phantom_stream = dc_state_get_paired_subvp_stream(dc->current_state, dc->current_state->res_ctx.pipe_ctx[i].stream); - - if (phantom_stream) { - main_pipe_width = phantom_stream->dst.width; - main_pipe_height = phantom_stream->dst.height; - } - - dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); + if (dc->hwseq->funcs.blank_pixel_data) { + dc->hwseq->funcs.blank_pixel_data(dc, pipe, true); } tg->funcs->enable_crtc(tg); } @@ -2255,9 +2246,9 @@ void dcn20_post_unlock_program_front_end( struct timing_generator *tg = pipe->stream_res.tg; - if (tg->funcs->get_double_buffer_pending) { + if (tg->funcs->get_optc_double_buffer_pending) { for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us - && tg->funcs->get_double_buffer_pending(tg); j++) + && tg->funcs->get_optc_double_buffer_pending(tg); j++) udelay(polling_interval_us); } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 1ea95f8d4cbcc..630e05f32c806 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -242,14 +242,15 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) } bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp) + struct set_backlight_level_params *params) { struct dc_context *dc = pipe_ctx->stream->ctx; struct abm *abm = pipe_ctx->stream_res.abm; struct timing_generator *tg = pipe_ctx->stream_res.tg; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; uint32_t otg_inst; + uint32_t backlight_pwm_u16_16 = params->backlight_pwm_u16_16; + uint32_t frame_ramp = params->frame_ramp; if (!abm || !tg || !panel_cntl) return false; @@ -257,7 +258,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, otg_inst = tg->inst; if (dc->dc->res_pool->dmcu) { - dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp); + dce110_set_backlight_level(pipe_ctx, params); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h index 9cee9bdb8de95..a7eaaa4596be4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h @@ -50,8 +50,7 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, void dcn21_set_pipe(struct pipe_ctx *pipe_ctx); void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx); bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp); + struct set_backlight_level_params *params); bool dcn21_is_abm_supported(struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 42c52284a8680..bf9b5daea2af2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -455,7 +455,7 @@ bool dcn30_mmhubbub_warmup( struct mcif_wb *mcif_wb; struct mcif_warmup_params warmup_params = {0}; unsigned int i, i_buf; - /*make sure there is no active DWB eanbled */ + /* make sure there is no active DWB enabled */ for (i = 0; i < num_dwb; i++) { dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst]; if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) { @@ -1185,3 +1185,30 @@ void dcn30_prepare_bandwidth(struct dc *dc, if (!dc->clk_mgr->clks.fw_based_mclk_switching) dc_dmub_srv_p_state_delegate(dc, false, context); } + +void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx) +{ + struct timing_generator *tg = pipe_ctx->stream_res.tg; + bool pending_updates = false; + unsigned int i; + + if (tg && tg->funcs->is_tg_enabled(tg)) { + // Poll for 100ms maximum + for (i = 0; i < 100000; i++) { + pending_updates = false; + if (tg->funcs->get_optc_double_buffer_pending) + pending_updates |= tg->funcs->get_optc_double_buffer_pending(tg); + + if (tg->funcs->get_otg_double_buffer_pending) + pending_updates |= tg->funcs->get_otg_double_buffer_pending(tg); + + if (tg->funcs->get_pipe_update_pending && pipe_ctx->plane_state) + pending_updates |= tg->funcs->get_pipe_update_pending(tg); + + if (!pending_updates) + break; + + udelay(1); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h index 6a153e7ce910e..4b90b781c4f2d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h @@ -96,4 +96,6 @@ void dcn30_set_hubp_blank(const struct dc *dc, void dcn30_prepare_bandwidth(struct dc *dc, struct dc_state *context); +void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx); + #endif /* __DC_HWSS_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 2a8dc40d28477..0e8d32e3dbae1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -108,7 +108,8 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, .get_dcc_en_bits = dcn10_get_dcc_en_bits, .update_visual_confirm_color = dcn10_update_visual_confirm_color, - .is_abm_supported = dcn21_is_abm_supported + .is_abm_supported = dcn21_is_abm_supported, + .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, }; static const struct hwseq_private_funcs dcn30_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 93e49d87a67ce..780ce4c064aa5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -107,6 +107,7 @@ static const struct hw_sequencer_funcs dcn301_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, }; static const struct hwseq_private_funcs dcn301_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 3d4b31bd99469..0f746f12b385e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -517,10 +517,18 @@ static void dcn31_reset_back_end_for_pipe( dc->hwss.set_abm_immediate_disable(pipe_ctx); + link = pipe_ctx->stream->link; + + if ((!pipe_ctx->stream->dpms_off || link->link_status.link_active) && + (link->connector_signal == SIGNAL_TYPE_EDP)) + dc->hwss.blank_stream(pipe_ctx); + pipe_ctx->stream_res.tg->funcs->set_dsc_config( pipe_ctx->stream_res.tg, OPTC_DSC_DISABLED, 0, 0); + pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); + pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false); if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass) pipe_ctx->stream_res.tg->funcs->set_odm_bypass( @@ -532,7 +540,6 @@ static void dcn31_reset_back_end_for_pipe( pipe_ctx->stream_res.tg->funcs->set_drr( pipe_ctx->stream_res.tg, NULL); - link = pipe_ctx->stream->link; /* DPMS may already disable or */ /* dpms_off status is incorrect due to fastboot * feature. When system resume from S4 with second diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index b57dd45611f23..56f3c70d4b554 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -111,6 +111,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn31_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 4e93eeedfc1bb..9b88eb72086db 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -355,6 +355,20 @@ void dcn314_calculate_pix_rate_divider( } } +static bool dcn314_is_pipe_dig_fifo_on(struct pipe_ctx *pipe) +{ + return pipe && pipe->stream + // Check dig's otg instance. + && pipe->stream_res.stream_enc + && pipe->stream_res.stream_enc->funcs->dig_source_otg + && pipe->stream_res.tg->inst == pipe->stream_res.stream_enc->funcs->dig_source_otg(pipe->stream_res.stream_enc) + && pipe->stream->link && pipe->stream->link->link_enc + && pipe->stream->link->link_enc->funcs->is_dig_enabled + && pipe->stream->link->link_enc->funcs->is_dig_enabled(pipe->stream->link->link_enc) + && pipe->stream_res.stream_enc->funcs->is_fifo_enabled + && pipe->stream_res.stream_enc->funcs->is_fifo_enabled(pipe->stream_res.stream_enc); +} + void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; @@ -371,7 +385,11 @@ void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) && + !pipe->stream->apply_seamless_boot_optimization && + !pipe->stream->apply_edp_fast_boot_optimization) { + if (dcn314_is_pipe_dig_fifo_on(pipe)) + continue; pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); otg_disabled[i] = true; @@ -478,7 +496,7 @@ void dcn314_disable_link_output(struct dc_link *link, * from enable/disable link output and only call edp panel control * in enable_link_dp and disable_link_dp once. */ - if (dmcu != NULL && dmcu->funcs->lock_phy) + if (dmcu != NULL && dmcu->funcs->unlock_phy) dmcu->funcs->unlock_phy(dmcu); dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index fe5495a8e7a2b..68e6de6b5758d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -114,6 +114,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn314_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index a36e11606f90e..5e2edba93a199 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -129,7 +129,6 @@ void dcn32_dsc_pg_control( REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } - void dcn32_enable_power_gating_plane( struct dce_hwseq *hws, bool enable) @@ -1032,6 +1031,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; @@ -1384,10 +1397,10 @@ void dcn32_disable_link_output(struct dc_link *link, link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; if (signal == SIGNAL_TYPE_EDP && - link->dc->hwss.edp_backlight_control && + link->dc->hwss.edp_power_control && !link->skip_implict_edp_power_control) link->dc->hwss.edp_power_control(link, false); - else if (dmcu != NULL && dmcu->funcs->lock_phy) + else if (dmcu != NULL && dmcu->funcs->unlock_phy) dmcu->funcs->unlock_phy(dmcu); dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); @@ -1684,52 +1697,6 @@ void dcn32_init_blank( hws->funcs.wait_for_blank_complete(opp); } -void dcn32_blank_phantom(struct dc *dc, - struct timing_generator *tg, - int width, - int height) -{ - struct dce_hwseq *hws = dc->hwseq; - enum dc_color_space color_space; - struct tg_color black_color = {0}; - struct output_pixel_processor *opp = NULL; - uint32_t num_opps, opp_id_src0, opp_id_src1; - uint32_t otg_active_width, otg_active_height; - uint32_t i; - - /* program opp dpg blank color */ - color_space = COLOR_SPACE_SRGB; - color_space_to_black_color(dc, color_space, &black_color); - - otg_active_width = width; - otg_active_height = height; - - /* get the OPTC source */ - tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1); - ASSERT(opp_id_src0 < dc->res_pool->res_cap->num_opp); - - for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) { - if (dc->res_pool->opps[i] != NULL && dc->res_pool->opps[i]->inst == opp_id_src0) { - opp = dc->res_pool->opps[i]; - break; - } - } - - if (opp && opp->funcs->opp_set_disp_pattern_generator) - opp->funcs->opp_set_disp_pattern_generator( - opp, - CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR, - CONTROLLER_DP_COLOR_SPACE_UDEFINED, - COLOR_DEPTH_UNDEFINED, - &black_color, - otg_active_width, - otg_active_height, - 0); - - if (tg->funcs->is_tg_enabled(tg)) - hws->funcs.wait_for_blank_complete(opp); -} - /* phantom stream id's can change often, but can be identical between contexts. * This function checks for the condition the streams are identical to avoid * redundant pipe transitions. diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index cac4a08b92a4d..0303a59536737 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -119,11 +119,6 @@ void dcn32_init_blank( struct dc *dc, struct timing_generator *tg); -void dcn32_blank_phantom(struct dc *dc, - struct timing_generator *tg, - int width, - int height); - bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, const struct dc_state *cur_ctx, const struct dc_state *new_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 3422b564ae984..dbcd2dfb19c12 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -117,10 +117,10 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .update_phantom_vp_position = dcn32_update_phantom_vp_position, .update_dsc_pg = dcn32_update_dsc_pg, .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, - .blank_phantom = dcn32_blank_phantom, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, .program_outstanding_updates = dcn32_program_outstanding_updates, + .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 899e239352aa0..bd309dbdf7b2a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -147,37 +147,6 @@ void dcn35_init_hw(struct dc *dc) hws->funcs.bios_golden_init(dc); } - if (!dc->debug.disable_clock_gate) { - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - REG_WRITE(DCCG_GATE_DISABLE_CNTL2, 0); - - /* Disable gating for PHYASYMCLK. This will be enabled in dccg if needed */ - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, PHYASYMCLK_ROOT_GATE_DISABLE, 1, - PHYBSYMCLK_ROOT_GATE_DISABLE, 1, - PHYCSYMCLK_ROOT_GATE_DISABLE, 1, - PHYDSYMCLK_ROOT_GATE_DISABLE, 1, - PHYESYMCLK_ROOT_GATE_DISABLE, 1); - - REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL4, - DPIASYMCLK0_GATE_DISABLE, 0, - DPIASYMCLK1_GATE_DISABLE, 0, - DPIASYMCLK2_GATE_DISABLE, 0, - DPIASYMCLK3_GATE_DISABLE, 0); - - REG_WRITE(DCCG_GATE_DISABLE_CNTL5, 0xFFFFFFFF); - REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, - DTBCLK_P0_GATE_DISABLE, 0, - DTBCLK_P1_GATE_DISABLE, 0, - DTBCLK_P2_GATE_DISABLE, 0, - DTBCLK_P3_GATE_DISABLE, 0); - REG_UPDATE_4(DCCG_GATE_DISABLE_CNTL5, - DPSTREAMCLK0_GATE_DISABLE, 0, - DPSTREAMCLK1_GATE_DISABLE, 0, - DPSTREAMCLK2_GATE_DISABLE, 0, - DPSTREAMCLK3_GATE_DISABLE, 0); - - } - // Initialize the dccg if (res_pool->dccg->funcs->dccg_init) res_pool->dccg->funcs->dccg_init(res_pool->dccg); @@ -271,6 +240,10 @@ void dcn35_init_hw(struct dc *dc) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } + if (res_pool->dccg->funcs->dccg_root_gate_disable_control) { + for (i = 0; i < res_pool->pipe_count; i++) + res_pool->dccg->funcs->dccg_root_gate_disable_control(res_pool->dccg, i, 0); + } for (i = 0; i < res_pool->audio_count; i++) { struct audio *audio = res_pool->audios[i]; @@ -305,20 +278,6 @@ void dcn35_init_hw(struct dc *dc) if (!dc->debug.disable_clock_gate) { /* enable all DCN clock gating */ - REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0); - - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_FE_GATE_DISABLE, 0, - SYMCLKB_FE_GATE_DISABLE, 0, - SYMCLKC_FE_GATE_DISABLE, 0, - SYMCLKD_FE_GATE_DISABLE, 0, - SYMCLKE_FE_GATE_DISABLE, 0); - REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, HDMICHARCLK0_GATE_DISABLE, 0); - REG_UPDATE_5(DCCG_GATE_DISABLE_CNTL2, SYMCLKA_GATE_DISABLE, 0, - SYMCLKB_GATE_DISABLE, 0, - SYMCLKC_GATE_DISABLE, 0, - SYMCLKD_GATE_DISABLE, 0, - SYMCLKE_GATE_DISABLE, 0); - REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } @@ -375,7 +334,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct dsc_config dsc_cfg; struct dsc_optc_config dsc_optc_cfg = {0}; enum optc_dsc_mode optc_dsc_mode; + struct dcn_dsc_state dsc_state = {0}; + + if (!dsc) { + DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + if (dsc->funcs->dsc_read_state) { + dsc->funcs->dsc_read_state(dsc, &dsc_state); + if (!dsc_state.dsc_fw_en) { + DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst); + return; + } + } /* Enable DSC hw block */ dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; @@ -1024,9 +996,6 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (!hpo_frl_stream_enc_acquired && !hpo_dp_stream_enc_acquired) update_state->pg_res_update[PG_HPO] = true; - if (hpo_frl_stream_enc_acquired) - update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; - update_state->pg_res_update[PG_DWB] = true; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -1462,7 +1431,13 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) { struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; struct dc *dc = pipe_ctx[i]->stream->ctx->dc; @@ -1474,14 +1449,12 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, num_frames = 2 * (frame_rate % 60); } } - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 2bbf1fef94fd2..55dc5799e725a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -123,7 +123,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, - .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn35_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 5da3069fc1aba..a93864b63d48f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -122,7 +122,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, - .program_outstanding_updates = dcn32_program_outstanding_updates, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn351_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 0b743669f23b4..721eb77bf1685 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -844,6 +844,13 @@ enum dc_status dcn401_enable_stream_timing( odm_slice_width, last_odm_slice_width); } + /* set DTBCLK_P */ + if (dc->res_pool->dccg->funcs->set_dtbclk_p_src) { + if (dc_is_dp_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) { + dc->res_pool->dccg->funcs->set_dtbclk_p_src(dc->res_pool->dccg, DPREFCLK, pipe_ctx->stream_res.tg->inst); + } + } + /* HW program guide assume display already disable * by unplug sequence. OTG assume stop. */ @@ -1004,8 +1011,6 @@ void dcn401_enable_stream(struct pipe_ctx *pipe_ctx) dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); } else { - /* need to set DTBCLK_P source to DPREFCLK for DP8B10B */ - dccg->funcs->set_dtbclk_p_src(dccg, DPREFCLK, tg->inst); dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, link_enc->transmitter - TRANSMITTER_UNIPHY_A); } @@ -1097,6 +1102,58 @@ void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct } } +static void disable_link_output_symclk_on_tx_off(struct dc_link *link, enum dp_link_encoding link_encoding) +{ + struct dc *dc = link->ctx->dc; + struct pipe_ctx *pipe_ctx = NULL; + uint8_t i; + + for (i = 0; i < MAX_PIPES; i++) { + pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx->stream && pipe_ctx->stream->link == link && pipe_ctx->top_pipe == NULL) { + pipe_ctx->clock_source->funcs->program_pix_clk( + pipe_ctx->clock_source, + &pipe_ctx->stream_res.pix_clk_params, + link_encoding, + &pipe_ctx->pll_settings); + break; + } + } +} + +void dcn401_disable_link_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal) +{ + struct dc *dc = link->ctx->dc; + const struct link_hwss *link_hwss = get_link_hwss(link, link_res); + struct dmcu *dmcu = dc->res_pool->dmcu; + + if (signal == SIGNAL_TYPE_EDP && + link->dc->hwss.edp_backlight_control && + !link->skip_implict_edp_power_control) + link->dc->hwss.edp_backlight_control(link, false); + else if (dmcu != NULL && dmcu->funcs->lock_phy) + dmcu->funcs->lock_phy(dmcu); + + if (dc_is_tmds_signal(signal) && link->phy_state.symclk_ref_cnts.otg > 0) { + disable_link_output_symclk_on_tx_off(link, DP_UNKNOWN_ENCODING); + link->phy_state.symclk_state = SYMCLK_ON_TX_OFF; + } else { + link_hwss->disable_link_output(link, link_res, signal); + link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; + } + + if (signal == SIGNAL_TYPE_EDP && + link->dc->hwss.edp_backlight_control && + !link->skip_implict_edp_power_control) + link->dc->hwss.edp_power_control(link, false); + else if (dmcu != NULL && dmcu->funcs->lock_phy) + dmcu->funcs->unlock_phy(dmcu); + + dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY); +} + void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) { struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; @@ -1426,6 +1483,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); + /* update timeout thresholds */ + if (hubbub->funcs->program_arbiter) { + dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); + } /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { @@ -1467,6 +1528,10 @@ void dcn401_optimize_bandwidth( &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); + /* update timeout thresholds */ + if (hubbub->funcs->program_arbiter) { + hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, true); + } if (dc->clk_mgr->dc_mode_softmax_enabled) if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && @@ -1669,7 +1734,7 @@ void dcn401_hardware_release(struct dc *dc) } } -void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master) +void dcn401_wait_for_det_buffer_update_under_otg_master(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master) { struct pipe_ctx *opp_heads[MAX_PIPES]; struct pipe_ctx *dpp_pipes[MAX_PIPES]; @@ -1695,6 +1760,9 @@ void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, hubbub->funcs->wait_for_det_update) hubbub->funcs->wait_for_det_update(hubbub, dpp_pipe->plane_res.hubp->inst); } + } else { + if (hubbub && opp_heads[slice_idx]->plane_res.hubp && hubbub->funcs->wait_for_det_update) + hubbub->funcs->wait_for_det_update(hubbub, opp_heads[slice_idx]->plane_res.hubp->inst); } } } @@ -1705,7 +1773,6 @@ void dcn401_interdependent_update_lock(struct dc *dc, unsigned int i = 0; struct pipe_ctx *pipe = NULL; struct timing_generator *tg = NULL; - bool pipe_unlocked[MAX_PIPES] = {0}; if (lock) { for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -1719,43 +1786,38 @@ void dcn401_interdependent_update_lock(struct dc *dc, dc->hwss.pipe_control_lock(dc, pipe, true); } } else { - /* Unlock pipes based on the change in DET allocation instead of pipe index - * Prevents over allocation of DET during unlock process - * e.g. 2 pipe config with different streams with a max of 20 DET segments - * Before: After: - * - Pipe0: 10 DET segments - Pipe0: 12 DET segments - * - Pipe1: 10 DET segments - Pipe1: 8 DET segments - * If Pipe0 gets updated first, 22 DET segments will be allocated - */ + /* Need to free DET being used first and have pipe update, then unlock the remaining pipes*/ for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; tg = pipe->stream_res.tg; - int current_pipe_idx = i; if (!resource_is_pipe_type(pipe, OTG_MASTER) || !tg->funcs->is_tg_enabled(tg) || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { - pipe_unlocked[i] = true; continue; } - // If the same stream exists in old context, ensure the OTG_MASTER pipes for the same stream get compared - struct pipe_ctx *old_otg_master = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, pipe->stream); - - if (old_otg_master) - current_pipe_idx = old_otg_master->pipe_idx; - if (resource_calculate_det_for_stream(context, pipe) < - resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[current_pipe_idx])) { + if (dc->scratch.pipes_to_unlock_first[i]) { + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; dc->hwss.pipe_control_lock(dc, pipe, false); - pipe_unlocked[i] = true; - dcn401_wait_for_det_buffer_update(dc, context, pipe); + /* Assumes pipe of the same index in current_state is also an OTG_MASTER pipe*/ + dcn401_wait_for_det_buffer_update_under_otg_master(dc, dc->current_state, old_pipe); } } + /* Unlocking the rest of the pipes */ for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (pipe_unlocked[i]) + if (dc->scratch.pipes_to_unlock_first[i]) continue; + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + continue; + } + dc->hwss.pipe_control_lock(dc, pipe, false); } } @@ -1770,3 +1832,129 @@ void dcn401_program_outstanding_updates(struct dc *dc, if (hubbub->funcs->program_compbuf_segments) hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true); } + +void dcn401_reset_back_end_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context) +{ + int i; + struct dc_link *link = pipe_ctx->stream->link; + const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + + DC_LOGGER_INIT(dc->ctx->logger); + if (pipe_ctx->stream_res.stream_enc == NULL) { + pipe_ctx->stream = NULL; + return; + } + + /* DPMS may already disable or */ + /* dpms_off status is incorrect due to fastboot + * feature. When system resume from S4 with second + * screen only, the dpms_off would be true but + * VBIOS lit up eDP, so check link status too. + */ + if (!pipe_ctx->stream->dpms_off || link->link_status.link_active) + dc->link_srv->set_dpms_off(pipe_ctx); + else if (pipe_ctx->stream_res.audio) + dc->hwss.disable_audio_stream(pipe_ctx); + + /* free acquired resources */ + if (pipe_ctx->stream_res.audio) { + /*disable az_endpoint*/ + pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); + + /*free audio*/ + if (dc->caps.dynamic_audio == true) { + /*we have to dynamic arbitrate the audio endpoints*/ + /*we free the resource, need reset is_audio_acquired*/ + update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, + pipe_ctx->stream_res.audio, false); + pipe_ctx->stream_res.audio = NULL; + } + } + + /* by upper caller loop, parent pipe: pipe0, will be reset last. + * back end share by all pipes and will be disable only when disable + * parent pipe. + */ + if (pipe_ctx->top_pipe == NULL) { + + dc->hwss.set_abm_immediate_disable(pipe_ctx); + + pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); + + pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false); + if (pipe_ctx->stream_res.tg->funcs->set_odm_bypass) + pipe_ctx->stream_res.tg->funcs->set_odm_bypass( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + + if (pipe_ctx->stream_res.tg->funcs->set_drr) + pipe_ctx->stream_res.tg->funcs->set_drr( + pipe_ctx->stream_res.tg, NULL); + /* TODO - convert symclk_ref_cnts for otg to a bit map to solve + * the case where the same symclk is shared across multiple otg + * instances + */ + if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) + link->phy_state.symclk_ref_cnts.otg = 0; + if (link->phy_state.symclk_state == SYMCLK_ON_TX_OFF) { + link_hwss->disable_link_output(link, + &pipe_ctx->link_res, pipe_ctx->stream->signal); + link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; + } + + /* reset DTBCLK_P */ + if (dc->res_pool->dccg->funcs->set_dtbclk_p_src) + dc->res_pool->dccg->funcs->set_dtbclk_p_src(dc->res_pool->dccg, REFCLK, pipe_ctx->stream_res.tg->inst); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) + if (&dc->current_state->res_ctx.pipe_ctx[i] == pipe_ctx) + break; + + if (i == dc->res_pool->pipe_count) + return; + +/* + * In case of a dangling plane, setting this to NULL unconditionally + * causes failures during reset hw ctx where, if stream is NULL, + * it is expected that the pipe_ctx pointers to pipes and plane are NULL. + */ + pipe_ctx->stream = NULL; + DC_LOG_DEBUG("Reset back end for pipe %d, tg:%d\n", + pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst); +} + +void dcn401_reset_hw_ctx_wrap( + struct dc *dc, + struct dc_state *context) +{ + int i; + struct dce_hwseq *hws = dc->hwseq; + + /* Reset Back End*/ + for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) { + struct pipe_ctx *pipe_ctx_old = + &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || + pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { + struct clock_source *old_clk = pipe_ctx_old->clock_source; + + if (hws->funcs.reset_back_end_for_pipe) + hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); + if (hws->funcs.enable_stream_gating) + hws->funcs.enable_stream_gating(dc, pipe_ctx_old); + if (old_clk) + old_clk->funcs->cs_power_down(old_clk); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index a27e62081685d..66d679080c449 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -55,6 +55,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, bool lut_bank_a); void dcn401_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable); +void dcn401_disable_link_output(struct dc_link *link, + const struct link_resource *link_res, + enum signal_type signal); + void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx); bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable); @@ -81,7 +85,14 @@ void dcn401_hardware_release(struct dc *dc); void dcn401_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); -void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); +void dcn401_wait_for_det_buffer_update_under_otg_master(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); void dcn401_program_outstanding_updates(struct dc *dc, struct dc_state *context); +void dcn401_reset_back_end_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); +void dcn401_reset_hw_ctx_wrap( + struct dc *dc, + struct dc_state *context); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index a2ca07235c83d..a1392e776709d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -84,7 +84,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .enable_lvds_link_output = dce110_enable_lvds_link_output, .enable_tmds_link_output = dce110_enable_tmds_link_output, .enable_dp_link_output = dce110_enable_dp_link_output, - .disable_link_output = dcn32_disable_link_output, + .disable_link_output = dcn401_disable_link_output, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, .get_dcc_en_bits = dcn10_get_dcc_en_bits, .enable_phantom_streams = dcn32_enable_phantom_streams, @@ -93,13 +93,13 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .update_phantom_vp_position = dcn32_update_phantom_vp_position, .update_dsc_pg = dcn32_update_dsc_pg, .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, - .blank_phantom = dcn32_blank_phantom, .wait_for_dcc_meta_propagation = dcn401_wait_for_dcc_meta_propagation, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, .program_outstanding_updates = dcn401_program_outstanding_updates, + .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, }; static const struct hwseq_private_funcs dcn401_private_funcs = { @@ -111,7 +111,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .power_down = dce110_power_down, .enable_display_power_gating = dcn10_dummy_display_power_gating, .blank_pixel_data = dcn20_blank_pixel_data, - .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, + .reset_hw_ctx_wrap = dcn401_reset_hw_ctx_wrap, .enable_stream_timing = dcn401_enable_stream_timing, .edp_backlight_control = dce110_edp_backlight_control, .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, @@ -136,7 +136,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .update_mall_sel = dcn32_update_mall_sel, .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, - .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, + .reset_back_end_for_pipe = dcn401_reset_back_end_for_pipe, .populate_mcm_luts = NULL, }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 3268544898026..1df17c54f3a9f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -174,6 +174,11 @@ union block_sequence_params { struct fams2_global_control_lock_fast_params fams2_global_control_lock_fast_params; }; +struct set_backlight_level_params { + uint32_t backlight_pwm_u16_16; + uint32_t frame_ramp; +}; + enum block_sequence_func { DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST = 0, OPTC_PIPE_CONTROL_LOCK, @@ -365,8 +370,7 @@ struct hw_sequencer_funcs { void (*clear_status_bits)(struct dc *dc, unsigned int mask); bool (*set_backlight_level)(struct pipe_ctx *pipe_ctx, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp); + struct set_backlight_level_params *params); void (*set_abm_immediate_disable)(struct pipe_ctx *pipe_ctx); @@ -461,6 +465,8 @@ struct hw_sequencer_funcs { void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); void (*program_outstanding_updates)(struct dc *dc, struct dc_state *context); + void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); + void (*wait_for_all_pending_updates)(const struct pipe_ctx *pipe_ctx); }; void color_space_to_black_color( @@ -503,6 +509,10 @@ void get_mclk_switch_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); +void get_cursor_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color); + void set_p_state_switch_method( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index bfb8b8502d202..7de2dc933a098 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -201,7 +201,6 @@ struct resource_funcs { const struct resource_pool *pool, struct dc_3dlut **lut, struct dc_transfer_func **shaper); - enum dc_status (*add_dsc_to_stream_resource)( struct dc *dc, struct dc_state *state, struct dc_stream_state *stream); @@ -215,6 +214,10 @@ struct resource_funcs { void (*get_panel_config_defaults)(struct dc_panel_config *panel_config); void (*build_pipe_pix_clk_params)(struct pipe_ctx *pipe_ctx); + /* + * Get indicator of power from a context that went through full validation + */ + int (*get_power_profile)(const struct dc_state *context); }; struct audio_support{ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index d619eb229a62a..e94e9ba60f55a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -213,6 +213,7 @@ struct dccg_funcs { uint32_t otg_inst); void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); + void (*dccg_root_gate_disable_control)(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating); }; #endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 67c32401893e8..52b745667ef75 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -228,6 +228,7 @@ struct hubbub_funcs { void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); + bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 3d4c8bd42b492..4e08e80eafe8e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -342,7 +342,9 @@ struct timing_generator_funcs { void (*wait_drr_doublebuffer_pending_clear)(struct timing_generator *tg); void (*set_long_vtotal)(struct timing_generator *optc, const struct long_vtotal_params *params); void (*wait_odm_doublebuffer_pending_clear)(struct timing_generator *tg); - bool (*get_double_buffer_pending)(struct timing_generator *tg); + bool (*get_optc_double_buffer_pending)(struct timing_generator *tg); + bool (*get_otg_double_buffer_pending)(struct timing_generator *tg); + bool (*get_pipe_update_pending)(struct timing_generator *tg); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index 72a8479e1f2d7..dadedc2ccdcba 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -164,7 +164,6 @@ struct link_service { bool (*set_dsc_enable)(struct pipe_ctx *pipe_ctx, bool enable); bool (*update_dsc_config)(struct pipe_ctx *pipe_ctx); - /*************************** DDC **************************************/ struct ddc_service *(*create_ddc_service)( struct ddc_service_init_data *ddc_init_data); @@ -223,9 +222,9 @@ struct link_service { const struct link_resource *link_res, struct link_training_settings *lt_settings); void (*dpcd_write_rx_power_ctrl)(struct dc_link *link, bool on); + - - /*************************** DP IRQ Handler ***************************/ + /*************************** DP IRQ Handler ***************************/ bool (*dp_parse_link_loss_status)( struct dc_link *link, union hpd_irq_data *hpd_irq_dpcd_data); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 9cd80d3864c7b..cd1157d225abe 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -644,4 +644,6 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe */ int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master); + +bool resource_is_hpo_acquired(struct dc_state *context); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index df3781081da7a..ff8fe1a94965b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -67,6 +67,8 @@ static void dp_retrain_link_dp_test(struct dc_link *link, { struct pipe_ctx *pipes[MAX_PIPES]; struct dc_state *state = link->dc->current_state; + bool was_hpo_acquired = resource_is_hpo_acquired(link->dc->current_state); + bool is_hpo_acquired; uint8_t count; int i; @@ -83,6 +85,12 @@ static void dp_retrain_link_dp_test(struct dc_link *link, pipes[i]); } + if (link->dc->hwss.setup_hpo_hw_control) { + is_hpo_acquired = resource_is_hpo_acquired(state); + if (was_hpo_acquired != is_hpo_acquired) + link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired); + } + for (i = count-1; i >= 0; i--) link_set_dpms_on(state, pipes[i]); } diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c index 46fb3649bc86a..6499807af72a1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c @@ -50,8 +50,31 @@ static void update_dpia_stream_allocation_table(struct dc_link *link, DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", status, mst_alloc_slots, prev_mst_slots_in_use); - ASSERT(link_enc); - link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); + if (link_enc) + link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); +} + +static void set_dio_dpia_link_test_pattern(struct dc_link *link, + const struct link_resource *link_res, + struct encoder_set_dp_phy_pattern_param *tp_params) +{ + if (tp_params->dp_phy_pattern != DP_TEST_PATTERN_VIDEO_MODE) + return; + + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + if (!link_enc) + return; + + link_enc->funcs->dp_set_phy_pattern(link_enc, tp_params); + link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN); +} + +static void set_dio_dpia_lane_settings(struct dc_link *link, + const struct link_resource *link_res, + const struct dc_link_settings *link_settings, + const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]) +{ } static const struct link_hwss dpia_link_hwss = { @@ -65,8 +88,8 @@ static const struct link_hwss dpia_link_hwss = { .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, - .set_dp_link_test_pattern = set_dio_dp_link_test_pattern, - .set_dp_lane_settings = set_dio_dp_lane_settings, + .set_dp_link_test_pattern = set_dio_dpia_link_test_pattern, + .set_dp_lane_settings = set_dio_dpia_lane_settings, .update_stream_allocation_table = update_dpia_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 391dbe81534da..e026c728042a5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -48,6 +48,9 @@ #include "dm_helpers.h" #include "clk_mgr.h" + // Offset DPCD 050Eh == 0x5A +#define MST_HUB_ID_0x5A 0x5A + #define DC_LOGGER \ link->ctx->logger #define DC_LOGGER_INIT(logger) @@ -692,6 +695,15 @@ static void apply_dpia_mst_dsc_always_on_wa(struct dc_link *link) link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT && !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around) link->wa_flags.dpia_mst_dsc_always_on = true; + + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + link->type == dc_connection_mst_branch && + link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + link->dpcd_caps.branch_vendor_specific_data[2] == MST_HUB_ID_0x5A && + link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT && + !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around) { + link->wa_flags.dpia_mst_dsc_always_on = true; + } } static void revert_dpia_mst_dsc_always_on_wa(struct dc_link *link) @@ -1189,8 +1201,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, //sink only can use supported link rate table, we are foreced to enable it if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) link->panel_config.ilr.optimize_edp_link_rate = true; - if (edp_is_ilr_optimization_enabled(link)) - link->reported_link_cap.link_rate = get_max_link_rate_from_ilr_table(link); + link->reported_link_cap.link_rate = get_max_edp_link_rate(link); } } else { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index d6550b904b164..5d90096df60a9 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -721,6 +721,11 @@ static void set_avmute(struct pipe_ctx *pipe_ctx, bool enable) if (!dc_is_hdmi_signal(pipe_ctx->stream->signal)) return; +#if defined(CONFIG_DRM_AMD_DC_HDMI2_1) + if (pipe_ctx->stream->timing.flags.DSC) + return; +#endif + dc->hwss.set_avmute(pipe_ctx, enable); } @@ -1302,6 +1307,85 @@ static void remove_stream_from_alloc_table( } } +#ifndef HAVE_DRM_DP_REMOVE_RAYLOAD_PART +static enum dc_status deallocate_mst_payload_with_temp_drm_wa( + struct pipe_ctx *pipe_ctx) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + struct dc_dp_mst_stream_allocation_table proposed_table = {0}; + struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0); + int i; + bool mst_mode = (link->type == dc_connection_mst_branch); + /* adjust for drm changes*/ + const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + const struct dc_link_settings empty_link_settings = {0}; + DC_LOGGER_INIT(link->ctx->logger); + + if (link_hwss->ext.set_throttled_vcp_size) + link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp); + if (link_hwss->ext.set_hblank_min_symbol_width) + link_hwss->ext.set_hblank_min_symbol_width(pipe_ctx, + &empty_link_settings, + avg_time_slots_per_mtp); + + if (dm_helpers_dp_mst_write_payload_allocation_table( + stream->ctx, + stream, + &proposed_table, + false)) + update_mst_stream_alloc_table( + link, + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.hpo_dp_stream_enc, + &proposed_table); + else + DC_LOG_WARNING("Failed to update" + "MST allocation table for" + "pipe idx:%d\n", + pipe_ctx->pipe_idx); + + DC_LOG_MST("%s" + "stream_count: %d: ", + __func__, + link->mst_stream_alloc_table.stream_count); + + for (i = 0; i < MAX_CONTROLLER_NUM; i++) { + DC_LOG_MST("stream_enc[%d]: %p " + "stream[%d].hpo_dp_stream_enc: %p " + "stream[%d].vcp_id: %d " + "stream[%d].slot_count: %d\n", + i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + i, + (void *) link->mst_stream_alloc_table.stream_allocations[i].hpo_dp_stream_enc, + i, + link->mst_stream_alloc_table.stream_allocations[i].vcp_id, + i, + link->mst_stream_alloc_table.stream_allocations[i].slot_count); + } + + if (link_hwss->ext.update_stream_allocation_table == NULL || + link_dp_get_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { + DC_LOG_DEBUG("Unknown encoding format\n"); + return DC_ERROR_UNEXPECTED; + } + + link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, + &link->mst_stream_alloc_table); + + if (mst_mode) { + dm_helpers_dp_mst_poll_for_allocation_change_trigger( + stream->ctx, + stream); + } + + dm_helpers_dp_mst_update_mst_mgr_for_deallocation(stream->ctx, stream); + + return DC_OK; +} +#endif + static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; @@ -1314,6 +1398,10 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) const struct dc_link_settings empty_link_settings = {0}; DC_LOGGER_INIT(link->ctx->logger); +#ifndef HAVE_DRM_DP_REMOVE_RAYLOAD_PART + if (link->dc->debug.temp_mst_deallocation_sequence) + return deallocate_mst_payload_with_temp_drm_wa(pipe_ctx); +#endif /* deallocate_mst_payload is called before disable link. When mode or * disable/enable monitor, new stream is created which is not in link * stream[] yet. For this, payload is not allocated yet, so de-alloc @@ -1906,7 +1994,6 @@ static void disable_link_dp(struct dc_link *link, if (signal == SIGNAL_TYPE_DISPLAY_PORT_MST) /* set the sink to SST mode after disabling the link */ enable_mst_on_sink(link, false); - if (link_dp_get_encoding_format(&link_settings) == DP_8b_10b_ENCODING) { dp_set_fec_enable(link, false); @@ -2082,6 +2169,9 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link_settings->link_rate == LINK_RATE_LOW) skip_video_pattern = false; + if (stream->sink_patches.oled_optimize_display_on) + set_default_brightness_aux(link); + if (perform_link_training_with_retries(link_settings, skip_video_pattern, lt_attempts, @@ -2105,10 +2195,14 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link->dpcd_sink_ext_caps.bits.oled == 1 || link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 || link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) { - set_default_brightness_aux(link); - if (link->dpcd_sink_ext_caps.bits.oled == 1) - msleep(bl_oled_enable_delay); - edp_backlight_enable_aux(link, true); + if (!stream->sink_patches.oled_optimize_display_on) { + set_default_brightness_aux(link); + if (link->dpcd_sink_ext_caps.bits.oled == 1) + msleep(bl_oled_enable_delay); + edp_backlight_enable_aux(link, true); + } else { + edp_backlight_enable_aux(link, true); + } } return status; @@ -2358,7 +2452,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, false); @@ -2532,7 +2626,6 @@ void link_set_dpms_on( dc_is_virtual_signal(pipe_ctx->stream->signal)) link_set_dsc_enable(pipe_ctx, true); } - status = enable_link(state, pipe_ctx); if (status != DC_OK) { @@ -2591,7 +2684,7 @@ void link_set_dpms_on( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 8246006857b30..5e1b5ab9fbc63 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -385,7 +385,7 @@ static void link_destruct(struct dc_link *link) if (link->panel_cntl) link->panel_cntl->funcs->destroy(&link->panel_cntl); - if (link->link_enc) { + if (link->link_enc && !link->is_dig_mapping_flexible) { /* Update link encoder resource tracking variables. These are used for * the dynamic assignment of link encoders to streams. Virtual links * are not assigned encoder resources on creation. @@ -524,6 +524,7 @@ static bool construct_phy(struct dc_link *link, link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_MXM: case CONNECTOR_ID_USBC: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 1aed55b0ab6a0..cd654db1ab3ed 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -136,11 +136,9 @@ static bool dp_active_dongle_validate_timing( return false; } } - if (dpcd_caps->channel_coding_cap.bits.DP_128b_132b_SUPPORTED == 0 && dpcd_caps->dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_PASSTHROUGH_SUPPORT == 0 && dongle_caps->dfp_cap_ext.supported) { - if (dongle_caps->dfp_cap_ext.max_pixel_rate_in_mps < (timing->pix_clk_100hz / 10000)) return false; @@ -287,6 +285,13 @@ static bool dp_validate_mode_timing( req_bw = dc_bandwidth_in_kbps_from_timing(timing, dc_link_get_highest_encoding_format(link)); max_bw = dp_link_bandwidth_kbps(link, link_setting); + bool is_max_uncompressed_pixel_rate_exceeded = link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.valid && + timing->pix_clk_100hz > link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.max_uncompressed_pixel_rate_cap * 10000; + + if (is_max_uncompressed_pixel_rate_exceeded && !timing->flags.DSC) { + return false; + } + if (req_bw <= max_bw) { /* remember the biggest mode here, during * initial link training (to get diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 59c9dde108850..8c7f8b3e57a25 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -212,6 +212,13 @@ static enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in case 10000000: link_rate = LINK_RATE_UHBR10; // UHBR10 - 10.0 Gbps/Lane break; + case 13500000: + link_rate = LINK_RATE_UHBR13_5; // UHBR13.5 - 13.5 Gbps/Lane + break; + case 20000000: + link_rate = LINK_RATE_UHBR20; // UHBR20 - 20.0 Gbps/Lane + break; + default: link_rate = LINK_RATE_UNKNOWN; break; @@ -541,6 +548,23 @@ static enum dc_link_rate increase_link_rate(struct dc_link *link, } } +static void increase_edp_link_rate(struct dc_link *link, + struct dc_link_settings *current_link_setting) +{ + if (current_link_setting->use_link_rate_set) { + if (current_link_setting->link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting->link_rate_set++; + current_link_setting->link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting->link_rate_set]; + } else { + current_link_setting->use_link_rate_set = false; + current_link_setting->link_rate = LINK_RATE_UHBR10; + } + } else { + current_link_setting->link_rate = increase_link_rate(link, current_link_setting->link_rate); + } +} + static bool decide_fallback_link_setting_max_bw_policy( struct dc_link *link, const struct dc_link_settings *max, @@ -759,19 +783,13 @@ bool edp_decide_link_settings(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else - break; + increase_edp_link_rate(link, ¤t_link_setting); } } return false; } + bool decide_edp_link_settings_with_dsc(struct dc_link *link, struct dc_link_settings *link_setting, uint32_t req_bw, @@ -818,9 +836,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, if (policy) { /* minimize lane */ if (current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { @@ -839,9 +855,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); current_link_setting.lane_count = initial_link_setting.lane_count; } @@ -874,18 +888,15 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, } if (policy) { /* minimize lane */ - if (current_link_setting.link_rate_set < - link->dpcd_caps.edp_supported_link_rates_count - && current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + if (current_link_setting.link_rate < max_link_rate) { + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { current_link_setting.lane_count = increase_lane_count( current_link_setting.lane_count); current_link_setting.link_rate_set = initial_link_setting.link_rate_set; + current_link_setting.use_link_rate_set = initial_link_setting.use_link_rate_set; current_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; } else @@ -899,13 +910,8 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else + increase_edp_link_rate(link, ¤t_link_setting); + if (current_link_setting.link_rate == LINK_RATE_UNKNOWN) break; } } @@ -1202,6 +1208,13 @@ static void get_active_converter_info( dp_hw_fw_revision.ieee_fw_rev, sizeof(dp_hw_fw_revision.ieee_fw_rev)); } + + core_link_read_dpcd( + link, + DP_BRANCH_VENDOR_SPECIFIC_START, + (uint8_t *)link->dpcd_caps.branch_vendor_specific_data, + sizeof(link->dpcd_caps.branch_vendor_specific_data)); + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_14 && link->dpcd_caps.dongle_type != DISPLAY_DONGLE_NONE) { union dp_dfp_cap_ext dfp_cap_ext; @@ -1937,6 +1950,11 @@ static bool retrieve_link_cap(struct dc_link *link) DC_LOG_DP2("\tFEC aggregated error counters are supported"); } + core_link_read_dpcd(link, + DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP, + link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw, + sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw)); + retrieve_cable_id(link); dpcd_write_cable_id_to_dprx(link); @@ -2045,6 +2063,14 @@ void detect_edp_sink_caps(struct dc_link *link) core_link_read_dpcd(link, DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE, &link->dpcd_caps.pr_info.max_deviation_line, sizeof(link->dpcd_caps.pr_info.max_deviation_line)); + + /* + * OLED Emission Rate info + */ + if (link->dpcd_sink_ext_caps.bits.emission_output) + core_link_read_dpcd(link, DP_SINK_EMISSION_RATE, + (uint8_t *)&link->dpcd_caps.edp_oled_emission_rate, + sizeof(link->dpcd_caps.edp_oled_emission_rate)); } bool dp_get_max_link_enc_cap(const struct dc_link *link, struct dc_link_settings *max_link_enc_cap) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h index 1eb0619d6710e..c67665395712b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h @@ -48,10 +48,8 @@ void dp_set_drive_settings( struct dc_link *link, const struct link_resource *link_res, struct link_training_settings *lt_settings); - enum dc_status dp_set_fec_ready(struct dc_link *link, const struct link_resource *link_res, bool ready); - void dp_set_fec_enable(struct dc_link *link, bool enable); void dpcd_write_rx_power_ctrl(struct dc_link *link, bool on); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 988999c444754..27a606f73213c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -515,6 +515,41 @@ bool dp_is_interlane_aligned(union lane_align_status_updated align_status) return align_status.bits.INTERLANE_ALIGN_DONE == 1; } +bool dp_check_interlane_aligned(union lane_align_status_updated align_status, + struct dc_link *link, + uint8_t retries) +{ + /* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4 + * has to share encoders unlike DP and USBC + */ + return (dp_is_interlane_aligned(align_status) || + (link->skip_fallback_on_link_loss && retries)); +} + +uint32_t dp_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t offset, + uint8_t retries) +{ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (offset == 0 && retries == 1 && lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) + return max(lt_settings->eq_pattern_time, (uint32_t) DPIA_CLK_SYNC_DELAY); + else + return dpia_get_eq_aux_rd_interval(link, lt_settings, offset); + } else if (is_repeater(lt_settings, offset)) + return dp_translate_training_aux_read_interval( + link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + else + return lt_settings->eq_pattern_time; +} + +bool dp_check_dpcd_reqeust_status(const struct dc_link *link, + enum dc_status status) +{ + return (status != DC_OK && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA); +} + enum link_training_result dp_check_link_loss_status( struct dc_link *link, const struct link_training_settings *link_training_setting) @@ -740,6 +775,7 @@ void override_training_settings( lt_settings->pattern_for_eq = *overrides->pattern_for_eq; if (overrides->enhanced_framing != NULL) lt_settings->enhanced_framing = *overrides->enhanced_framing; + if (link->preferred_training_settings.fec_enable != NULL) lt_settings->should_set_fec_ready = *link->preferred_training_settings.fec_enable; @@ -973,13 +1009,17 @@ void repeater_training_done(struct dc_link *link, uint32_t offset) dpcd_pattern.v1_4.TRAINING_PATTERN_SET); } -static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding) +static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding) { + enum dc_status status; uint8_t sink_status = 0; uint8_t i; /* clear training pattern set */ - dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); + + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; if (encoding == DP_128b_132b_ENCODING) { /* poll for intra-hop disable */ @@ -990,6 +1030,8 @@ static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding fsleep(1000); } } + + return LINK_TRAINING_SUCCESS; } enum dc_status dpcd_configure_channel_coding(struct dc_link *link, @@ -1013,17 +1055,18 @@ enum dc_status dpcd_configure_channel_coding(struct dc_link *link, return status; } -void dpcd_set_training_pattern( +enum dc_status dpcd_set_training_pattern( struct dc_link *link, enum dc_dp_training_pattern training_pattern) { + enum dc_status status; union dpcd_training_pattern dpcd_pattern = {0}; dpcd_pattern.v1_4.TRAINING_PATTERN_SET = dp_training_pattern_to_dpcd_training_pattern( link, training_pattern); - core_link_write_dpcd( + status = core_link_write_dpcd( link, DP_TRAINING_PATTERN_SET, &dpcd_pattern.raw, @@ -1033,6 +1076,8 @@ void dpcd_set_training_pattern( __func__, DP_TRAINING_PATTERN_SET, dpcd_pattern.v1_4.TRAINING_PATTERN_SET); + + return status; } enum dc_status dpcd_set_link_settings( @@ -1185,6 +1230,13 @@ void dpcd_set_lt_pattern_and_lane_settings( dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET] = dpcd_pattern.raw; + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + dpia_set_tps_notification( + link, + lt_settings, + dpcd_pattern.v1_4.TRAINING_PATTERN_SET, + offset); + if (is_repeater(lt_settings, offset)) { DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n", __func__, @@ -1455,7 +1507,8 @@ static enum link_training_result dp_transition_to_video_idle( */ if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) { msleep(5); - status = dp_check_link_loss_status(link, lt_settings); + if (!link->skip_fallback_on_link_loss) + status = dp_check_link_loss_status(link, lt_settings); } return status; } @@ -1521,7 +1574,9 @@ enum link_training_result dp_perform_link_training( ASSERT(0); /* exit training mode */ - dpcd_exit_training_mode(link, encoding); + if ((dpcd_exit_training_mode(link, encoding) != LINK_TRAINING_SUCCESS || status == LINK_TRAINING_ABORT) && + link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + dpia_training_abort(link, <_settings, 0); /* switch to video idle */ if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern) @@ -1599,8 +1654,7 @@ bool perform_link_training_with_retries( dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings); return true; } else { - /** @todo Consolidate USB4 DP and DPx.x training. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (!link->dc->config.consolidated_dpia_dp_lt && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { status = dpia_perform_link_training( link, &pipe_ctx->link_res, @@ -1629,8 +1683,17 @@ bool perform_link_training_with_retries( dp_trace_lt_total_count_increment(link, false); dp_trace_lt_result_update(link, status, false); dp_trace_set_lt_end_timestamp(link, false); - if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) + if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) { + // Update verified link settings to current one + // Because DPIA LT might fallback to lower link setting. + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + link->verified_link_cap.link_rate = link->cur_link_settings.link_rate; + link->verified_link_cap.lane_count = link->cur_link_settings.lane_count; + dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link); + } return true; + } } fail_count++; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h index 851bd17317a0c..0b18aa35c33cb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h @@ -55,7 +55,7 @@ void dp_set_hw_test_pattern( uint8_t *custom_pattern, uint32_t custom_pattern_size); -void dpcd_set_training_pattern( +enum dc_status dpcd_set_training_pattern( struct dc_link *link, enum dc_dp_training_pattern training_pattern); @@ -182,4 +182,18 @@ uint32_t dp_translate_training_aux_read_interval( uint8_t dp_get_nibble_at_index(const uint8_t *buf, uint32_t index); + +bool dp_check_interlane_aligned(union lane_align_status_updated align_status, + struct dc_link *link, + uint8_t retries); + +uint32_t dp_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t offset, + uint8_t retries); + +bool dp_check_dpcd_reqeust_status(const struct dc_link *link, + enum dc_status status); + #endif /* __DC_LINK_DP_TRAINING_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 2b4c15b0b4070..3bdce32a85e3c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -157,6 +157,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( struct link_training_settings *lt_settings, uint32_t offset) { + enum dc_status status; uint32_t retries_cr; uint32_t retry_count; uint32_t wait_time_microsec; @@ -216,7 +217,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( /* 4. Read lane status and requested drive * settings as set by the sink */ - dp_get_lane_status_and_lane_adjust( + status = dp_get_lane_status_and_lane_adjust( link, lt_settings, dpcd_lane_status, @@ -224,6 +225,9 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence( dpcd_lane_adjust, offset); + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; + /* 5. check CR done*/ if (dp_is_cr_done(lane_count, dpcd_lane_status)) { DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__); @@ -273,6 +277,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( struct link_training_settings *lt_settings, uint32_t offset) { + enum dc_status status; enum dc_dp_training_pattern tr_pattern; uint32_t retries_ch_eq; uint32_t wait_time_microsec; @@ -308,12 +313,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( dpcd_set_lane_settings(link, lt_settings, offset); /* 3. wait for receiver to lock-on*/ - wait_time_microsec = lt_settings->eq_pattern_time; - - if (is_repeater(lt_settings, offset)) - wait_time_microsec = - dp_translate_training_aux_read_interval( - link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]); + wait_time_microsec = dp_get_eq_aux_rd_interval(link, lt_settings, offset, retries_ch_eq); dp_wait_for_training_aux_rd_interval( link, @@ -322,7 +322,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( /* 4. Read lane status and requested * drive settings as set by the sink*/ - dp_get_lane_status_and_lane_adjust( + status = dp_get_lane_status_and_lane_adjust( link, lt_settings, dpcd_lane_status, @@ -330,6 +330,9 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( dpcd_lane_adjust, offset); + if (dp_check_dpcd_reqeust_status(link, status)) + return LINK_TRAINING_ABORT; + /* 5. check CR done*/ if (!dp_is_cr_done(lane_count, dpcd_lane_status)) return dpcd_lane_status[0].bits.CR_DONE_0 ? @@ -339,7 +342,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence( /* 6. check CHEQ done*/ if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) && dp_is_symbol_locked(lane_count, dpcd_lane_status) && - dp_is_interlane_aligned(dpcd_lane_status_updated)) + dp_check_interlane_aligned(dpcd_lane_status_updated, link, retries_ch_eq)) return LINK_TRAINING_SUCCESS; /* 7. update VS/PE/PC2 in lt_settings*/ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index cd1975c03f38d..39e4b7dc9588f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -43,9 +43,6 @@ #define DC_LOGGER \ link->ctx->logger -/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */ -#define DPIA_CLK_SYNC_DELAY 16000 - /* Extend interval between training status checks for manual testing. */ #define DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US 60000000 @@ -566,28 +563,6 @@ static enum link_training_result dpia_training_cr_phase( return result; } -/* Return status read interval during equalization phase. */ -static uint32_t dpia_get_eq_aux_rd_interval( - const struct dc_link *link, - const struct link_training_settings *lt_settings, - uint32_t hop) -{ - uint32_t wait_time_microsec; - - if (hop == DPRX) - wait_time_microsec = lt_settings->eq_pattern_time; - else - wait_time_microsec = - dp_translate_training_aux_read_interval( - link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]); - - /* Check debug option for extending aux read interval. */ - if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval) - wait_time_microsec = DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US; - - return wait_time_microsec; -} - /* Execute equalization phase of link training for specified hop in display * path in non-transparent mode: * - driver issues both DPCD and SET_CONFIG transactions. @@ -936,6 +911,22 @@ static enum link_training_result dpia_training_end( return result; } +/* Return status read interval during equalization phase. */ +uint32_t dpia_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t hop) +{ + /* Check debug option for extending aux read interval. */ + if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval) + return DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US; + else if (hop == DPRX) + return lt_settings->eq_pattern_time; + else + return dp_translate_training_aux_read_interval( + link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]); +} + /* When aborting training of specified hop in display path, clean up by: * - Attempting to clear DPCD TRAINING_PATTERN_SET, LINK_BW_SET and LANE_COUNT_SET. * - Sending SET_CONFIG(SET_LINK) with lane count and link rate set to 0. @@ -943,7 +934,7 @@ static enum link_training_result dpia_training_end( * @param link DPIA link being trained. * @param hop Hop in display path. DPRX = 0. */ -static void dpia_training_abort( +void dpia_training_abort( struct dc_link *link, struct link_training_settings *lt_settings, uint32_t hop) @@ -968,7 +959,26 @@ static void dpia_training_abort( core_link_write_dpcd(link, dpcd_tps_offset, &data, 1); core_link_write_dpcd(link, DP_LINK_BW_SET, &data, 1); core_link_write_dpcd(link, DP_LANE_COUNT_SET, &data, 1); - core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data); + + if (!link->dc->config.consolidated_dpia_dp_lt) + core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data); +} + +void dpia_set_tps_notification( + struct dc_link *link, + const struct link_training_settings *lt_settings, + uint8_t pattern, + uint32_t hop) +{ + uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */ + + if (lt_settings->lttpr_mode != LTTPR_MODE_NON_TRANSPARENT || pattern == DPCD_TRAINING_PATTERN_VIDEOIDLE) + return; + + repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + + if (hop != repeater_cnt) + dc_process_dmub_dpia_set_tps_notification(link->ctx->dc, link->link_index, pattern); } enum link_training_result dpia_perform_link_training( diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h index b39fb9faf1c2c..9f4eceb494c2d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.h @@ -28,6 +28,9 @@ #define __DC_LINK_DP_TRAINING_DPIA_H__ #include "link_dp_training.h" +/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */ +#define DPIA_CLK_SYNC_DELAY 16000 + /* Train DP tunneling link for USB4 DPIA display endpoint. * DPIA equivalent of dc_link_dp_perfrorm_link_training. * Aborts link training upon detection of sink unplug. @@ -38,4 +41,20 @@ enum link_training_result dpia_perform_link_training( const struct dc_link_settings *link_setting, bool skip_video_pattern); +void dpia_training_abort( + struct dc_link *link, + struct link_training_settings *lt_settings, + uint32_t hop); + +uint32_t dpia_get_eq_aux_rd_interval( + const struct dc_link *link, + const struct link_training_settings *lt_settings, + uint32_t hop); + +void dpia_set_tps_notification( + struct dc_link *link, + const struct link_training_settings *lt_settings, + uint8_t pattern, + uint32_t offset); + #endif /* __DC_LINK_DP_TRAINING_DPIA_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index bf820d2b4dc4a..43a467f6ce7bd 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -157,31 +157,11 @@ bool edp_set_backlight_level_nits(struct dc_link *link, uint32_t backlight_millinits, uint32_t transition_time_in_ms) { - struct dpcd_source_backlight_set dpcd_backlight_set; - uint8_t backlight_control = isHDR ? 1 : 0; - if (!link || (link->connector_signal != SIGNAL_TYPE_EDP && link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; - // OLEDs have no PWM, they can only use AUX - if (link->dpcd_sink_ext_caps.bits.oled == 1) - backlight_control = 1; - - *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; - *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; - - - if (!link->dpcd_caps.panel_luminance_control) { - if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL, - (uint8_t *)(&dpcd_backlight_set), - sizeof(dpcd_backlight_set)) != DC_OK) - return false; - - if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL, - &backlight_control, 1) != DC_OK) - return false; - } else { + if (link->backlight_control_type == BACKLIGHT_CONTROL_VESA_AUX) { uint8_t backlight_enable = 0; struct target_luminance_value *target_luminance = NULL; @@ -205,6 +185,24 @@ bool edp_set_backlight_level_nits(struct dc_link *link, (uint8_t *)(target_luminance), sizeof(struct target_luminance_value)) != DC_OK) return false; + } else { + struct dpcd_source_backlight_set dpcd_backlight_set; + *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; + *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; + + uint8_t backlight_control = isHDR ? 1 : 0; + // OLEDs have no PWM, they can only use AUX + if (link->dpcd_sink_ext_caps.bits.oled == 1) + backlight_control = 1; + + if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL, + (uint8_t *)(&dpcd_backlight_set), + sizeof(dpcd_backlight_set)) != DC_OK) + return false; + + if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL, + &backlight_control, 1) != DC_OK) + return false; } return true; @@ -305,16 +303,17 @@ bool edp_is_ilr_optimization_enabled(struct dc_link *link) return true; } -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link) +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link) { - enum dc_link_rate link_rate = link->reported_link_cap.link_rate; + enum dc_link_rate max_ilr_rate = LINK_RATE_UNKNOWN; + enum dc_link_rate max_non_ilr_rate = dp_get_max_link_cap(link).link_rate; for (int i = 0; i < link->dpcd_caps.edp_supported_link_rates_count; i++) { - if (link_rate < link->dpcd_caps.edp_supported_link_rates[i]) - link_rate = link->dpcd_caps.edp_supported_link_rates[i]; + if (max_ilr_rate < link->dpcd_caps.edp_supported_link_rates[i]) + max_ilr_rate = link->dpcd_caps.edp_supported_link_rates[i]; } - return link_rate; + return (max_ilr_rate > max_non_ilr_rate ? max_ilr_rate : max_non_ilr_rate); } bool edp_is_ilr_optimization_required(struct dc_link *link, @@ -522,13 +521,13 @@ bool edp_set_backlight_level(const struct dc_link *link, uint32_t frame_ramp) { struct dc *dc = link->ctx->dc; - DC_LOGGER_INIT(link->ctx->logger); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); if (dc_is_embedded_signal(link->connector_signal)) { struct pipe_ctx *pipe_ctx = get_pipe_from_link(link); + struct set_backlight_level_params backlight_level_param = { 0 }; if (link->panel_cntl) link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16; @@ -543,10 +542,12 @@ bool edp_set_backlight_level(const struct dc_link *link, return false; } + backlight_level_param.backlight_pwm_u16_16 = backlight_pwm_u16_16; + backlight_level_param.frame_ramp = frame_ramp; + dc->hwss.set_backlight_level( pipe_ctx, - backlight_pwm_u16_16, - frame_ramp); + &backlight_level_param); } return true; } @@ -939,8 +940,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream struct replay_context replay_context = { 0 }; unsigned int lineTimeInNs = 0; - - union replay_enable_and_configuration replay_config; + union replay_enable_and_configuration replay_config = { 0 }; union dpcd_alpm_configuration alpm_config; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 8df8ac5bde5b1..30dc8c24c008c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -69,7 +69,7 @@ bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); bool edp_is_ilr_optimization_enabled(struct dc_link *link); -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link); +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link); bool edp_backlight_enable_aux(struct dc_link *link, bool enable); void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index b7a57f98553d7..40757f20d73f4 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -202,6 +202,7 @@ struct dcn_optc_registers { uint32_t OPTC_CLOCK_CONTROL; uint32_t OPTC_WIDTH_CONTROL2; uint32_t OTG_PSTATE_REGISTER; + uint32_t OTG_PIPE_UPDATE_STATUS; }; #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ @@ -566,6 +567,12 @@ struct dcn_optc_registers { type OTG_H_TIMING_DIV_MODE_DB_UPDATE_PENDING;\ type OPTC_DOUBLE_BUFFER_PENDING;\ +#define TG_REG_FIELD_LIST_DCN2_0(type) \ + type OTG_FLIP_PENDING;\ + type OTG_DC_REG_UPDATE_PENDING;\ + type OTG_CURSOR_UPDATE_PENDING;\ + type OTG_VUPDATE_KEEPOUT_STATUS;\ + #define TG_REG_FIELD_LIST_DCN3_2(type) \ type OTG_H_TIMING_DIV_MODE_MANUAL; @@ -600,6 +607,7 @@ struct dcn_optc_registers { struct dcn_optc_shift { TG_REG_FIELD_LIST(uint8_t) + TG_REG_FIELD_LIST_DCN2_0(uint8_t) TG_REG_FIELD_LIST_DCN3_2(uint8_t) TG_REG_FIELD_LIST_DCN3_5(uint8_t) TG_REG_FIELD_LIST_DCN401(uint8_t) @@ -607,6 +615,7 @@ struct dcn_optc_shift { struct dcn_optc_mask { TG_REG_FIELD_LIST(uint32_t) + TG_REG_FIELD_LIST_DCN2_0(uint32_t) TG_REG_FIELD_LIST_DCN3_2(uint32_t) TG_REG_FIELD_LIST_DCN3_5(uint32_t) TG_REG_FIELD_LIST_DCN401(uint32_t) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h index 364034b190281..928e110b95fb5 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn20/dcn20_optc.h @@ -43,7 +43,8 @@ SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ SR(DWB_SOURCE_SELECT),\ SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst), \ - SRI(OTG_DRR_CONTROL, OTG, inst) + SRI(OTG_DRR_CONTROL, OTG, inst),\ + SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst) #define TG_COMMON_MASK_SH_LIST_DCN2_0(mask_sh)\ TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\ @@ -53,6 +54,10 @@ SF(OTG0_OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, mask_sh),\ SF(OTG0_OTG_GLOBAL_CONTROL2, DIG_UPDATE_LOCATION, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_RANGE_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\ SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_START_X, mask_sh),\ SF(OTG0_OTG_GSL_WINDOW_X, OTG_GSL_WINDOW_END_X, mask_sh), \ SF(OTG0_OTG_GSL_WINDOW_Y, OTG_GSL_WINDOW_START_Y, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c index abcd03d786684..4c95c09586122 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.c @@ -271,6 +271,48 @@ void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c optc1->opp_count = opp_cnt; } +/* OTG status register that indicates OPTC update is pending */ +bool optc3_get_optc_double_buffer_pending(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t update_pending = 0; + + REG_GET(OPTC_INPUT_GLOBAL_CONTROL, + OPTC_DOUBLE_BUFFER_PENDING, + &update_pending); + + return (update_pending == 1); +} + +/* OTG status register that indicates OTG update is pending */ +bool optc3_get_otg_update_pending(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t update_pending = 0; + + REG_GET(OTG_DOUBLE_BUFFER_CONTROL, + OTG_UPDATE_PENDING, + &update_pending); + + return (update_pending == 1); +} + +/* OTG status register that indicates surface update is pending */ +bool optc3_get_pipe_update_pending(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t flip_pending = 0; + uint32_t dc_update_pending = 0; + + REG_GET_2(OTG_PIPE_UPDATE_STATUS, + OTG_FLIP_PENDING, + &flip_pending, + OTG_DC_REG_UPDATE_PENDING, + &dc_update_pending); + + return (flip_pending == 1 || dc_update_pending == 1); +} + /** * optc3_set_timing_double_buffer() - DRR double buffering control * @@ -375,6 +417,9 @@ static struct timing_generator_funcs dcn30_tg_funcs = { .get_hw_timing = optc1_get_hw_timing, .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear, .is_two_pixels_per_container = optc1_is_two_pixels_per_container, + .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending, + .get_otg_double_buffer_pending = optc3_get_otg_update_pending, + .get_pipe_update_pending = optc3_get_pipe_update_pending, }; void dcn30_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h index bda974d432ea6..e2303f9eaf13b 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn30/dcn30_optc.h @@ -109,7 +109,8 @@ SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ - SR(DWB_SOURCE_SELECT) + SR(DWB_SOURCE_SELECT),\ + SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst) #define DCN30_VTOTAL_REGS_SF(mask_sh) @@ -209,6 +210,7 @@ SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ + SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_DOUBLE_BUFFER_PENDING, mask_sh),\ SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ @@ -319,7 +321,11 @@ SF(OTG0_OTG_DRR_V_TOTAL_CHANGE, OTG_DRR_V_TOTAL_CHANGE_LIMIT, mask_sh),\ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_PENDING, mask_sh),\ - SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh) + SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\ void dcn30_timing_generator_init(struct optc *optc1); @@ -356,4 +362,7 @@ void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc); void optc3_tg_init(struct timing_generator *optc); void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max); +bool optc3_get_optc_double_buffer_pending(struct timing_generator *optc); +bool optc3_get_otg_update_pending(struct timing_generator *optc); +bool optc3_get_pipe_update_pending(struct timing_generator *optc); #endif /* __DC_OPTC_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c index 1a22ae89fb555..d7a45ef2d01b3 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn301/dcn301_optc.c @@ -169,6 +169,9 @@ static struct timing_generator_funcs dcn30_tg_funcs = { .get_hw_timing = optc1_get_hw_timing, .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear, .is_two_pixels_per_container = optc1_is_two_pixels_per_container, + .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending, + .get_otg_double_buffer_pending = optc3_get_otg_update_pending, + .get_pipe_update_pending = optc3_get_pipe_update_pending, }; void dcn301_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h index 30b81a448ce2d..fbbe86d00c2e3 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.h @@ -99,7 +99,8 @@ SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ SRI(OTG_CRC_CNTL2, OTG, inst),\ SR(DWB_SOURCE_SELECT),\ - SRI(OTG_DRR_CONTROL, OTG, inst) + SRI(OTG_DRR_CONTROL, OTG, inst),\ + SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst) #define OPTC_COMMON_MASK_SH_LIST_DCN3_1(mask_sh)\ SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ @@ -254,7 +255,11 @@ SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\ void dcn31_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h index 99c098e76116f..0ff72b97b465c 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn314/dcn314_optc.h @@ -98,7 +98,8 @@ SRI(OPTC_BYTES_PER_PIXEL, ODM, inst),\ SRI(OPTC_WIDTH_CONTROL, ODM, inst),\ SRI(OPTC_MEMORY_CONFIG, ODM, inst),\ - SRI(OTG_DRR_CONTROL, OTG, inst) + SRI(OTG_DRR_CONTROL, OTG, inst),\ + SRI(OTG_PIPE_UPDATE_STATUS, OTG, inst) #define OPTC_COMMON_MASK_SH_LIST_DCN3_14(mask_sh)\ SF(OTG0_OTG_VSTARTUP_PARAM, VSTARTUP_START, mask_sh),\ @@ -248,7 +249,11 @@ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh),\ void dcn314_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c index 00094f0e84706..c217f653b3c81 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c @@ -297,18 +297,6 @@ static void optc32_set_drr( optc32_setup_manual_trigger(optc); } -bool optc32_get_double_buffer_pending(struct timing_generator *optc) -{ - struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t update_pending = 0; - - REG_GET(OPTC_INPUT_GLOBAL_CONTROL, - OPTC_DOUBLE_BUFFER_PENDING, - &update_pending); - - return (update_pending == 1); -} - static struct timing_generator_funcs dcn32_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, @@ -373,7 +361,9 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .setup_manual_trigger = optc2_setup_manual_trigger, .get_hw_timing = optc1_get_hw_timing, .is_two_pixels_per_container = optc1_is_two_pixels_per_container, - .get_double_buffer_pending = optc32_get_double_buffer_pending, + .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending, + .get_otg_double_buffer_pending = optc3_get_otg_update_pending, + .get_pipe_update_pending = optc3_get_pipe_update_pending, }; void dcn32_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h index 665d7c52f67cd..0b0964a9da748 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h @@ -177,7 +177,11 @@ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh) void dcn32_timing_generator_init(struct optc *optc1); void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode); @@ -185,6 +189,5 @@ void optc32_get_odm_combine_segments(struct timing_generator *tg, int *odm_combi void optc32_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing); void optc32_wait_odm_doublebuffer_pending_clear(struct timing_generator *tg); -bool optc32_get_double_buffer_pending(struct timing_generator *optc); #endif /* __DC_OPTC_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h index d077e2392379c..be749ab41dce7 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.h @@ -67,7 +67,11 @@ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL_READBACK, OTG_CRC1_WINDOWB_Y_END_READBACK, mask_sh),\ SF(OPTC_CLOCK_CONTROL, OPTC_FGCG_REP_DIS, mask_sh),\ SF(OTG0_OTG_V_COUNT_STOP_CONTROL, OTG_V_COUNT_STOP, mask_sh),\ - SF(OTG0_OTG_V_COUNT_STOP_CONTROL2, OTG_V_COUNT_STOP_TIMER, mask_sh) + SF(OTG0_OTG_V_COUNT_STOP_CONTROL2, OTG_V_COUNT_STOP_TIMER, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh) void dcn35_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index a5d6a7dca554c..db670fc172644 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -493,7 +493,9 @@ static struct timing_generator_funcs dcn401_tg_funcs = { .setup_manual_trigger = optc2_setup_manual_trigger, .get_hw_timing = optc1_get_hw_timing, .is_two_pixels_per_container = optc1_is_two_pixels_per_container, - .get_double_buffer_pending = optc32_get_double_buffer_pending, + .get_optc_double_buffer_pending = optc3_get_optc_double_buffer_pending, + .get_otg_double_buffer_pending = optc3_get_otg_update_pending, + .get_pipe_update_pending = optc3_get_pipe_update_pending, }; void dcn401_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h index bb13a645802d0..1be89571986ff 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h @@ -159,7 +159,11 @@ SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, mask_sh),\ SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_EXTEND, mask_sh),\ SF(OTG0_OTG_PSTATE_REGISTER, OTG_UNBLANK, mask_sh),\ - SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_ALLOW_WIDTH_MIN, mask_sh) + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_ALLOW_WIDTH_MIN, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_FLIP_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_DC_REG_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_CURSOR_UPDATE_PENDING, mask_sh),\ + SF(OTG0_OTG_PIPE_UPDATE_STATUS, OTG_VUPDATE_KEEPOUT_STATUS, mask_sh) void dcn401_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index eea2b3b307cd5..afc02462f191f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -1057,7 +1057,6 @@ void dcn20_clock_source_destroy(struct clock_source **clk_src) *clk_src = NULL; } - struct display_stream_compressor *dcn20_dsc_create( struct dc_context *ctx, uint32_t inst) { @@ -1079,7 +1078,6 @@ void dcn20_dsc_destroy(struct display_stream_compressor **dsc) *dsc = NULL; } - static void dcn20_resource_destruct(struct dcn20_resource_pool *pool) { unsigned int i; @@ -1321,7 +1319,6 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state return status; } - void dcn20_acquire_dsc(const struct dc *dc, struct resource_context *res_ctx, struct display_stream_compressor **dsc, @@ -1371,8 +1368,6 @@ void dcn20_release_dsc(struct resource_context *res_ctx, } } - - enum dc_status dcn20_add_dsc_to_stream_resource(struct dc *dc, struct dc_state *dc_ctx, struct dc_stream_state *dc_stream) @@ -1429,7 +1424,6 @@ static enum dc_status remove_dsc_from_stream_resource(struct dc *dc, return DC_OK; } - enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream) { enum dc_status result = DC_ERROR_UNEXPECTED; @@ -1511,6 +1505,7 @@ bool dcn20_split_stream_for_odm( if (prev_odm_pipe->plane_state) { struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data; + struct output_pixel_processor *opp = next_odm_pipe->stream_res.opp; int new_width; /* HACTIVE halved for odm combine */ @@ -1544,7 +1539,28 @@ bool dcn20_split_stream_for_odm( sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int( sd->ratios.horz_c, sd->h_active - sd->recout.x)); sd->recout.x = 0; + + /* + * When odm is used in YcbCr422 or 420 colour space, a split screen + * will be seen with the previous calculations since the extra left + * edge pixel is accounted for in fmt but not in viewport. + * + * Below are calculations which fix the split by fixing the calculations + * if there is an extra left edge pixel. + */ + if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count + && opp->funcs->opp_get_left_edge_extra_pixel_count( + opp, next_odm_pipe->stream->timing.pixel_encoding, + resource_is_pipe_type(next_odm_pipe, OTG_MASTER)) == 1) { + sd->h_active += 1; + sd->recout.width += 1; + sd->viewport.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + sd->viewport_c.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + sd->viewport_c.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + sd->viewport.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + } } + if (!next_odm_pipe->top_pipe) next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx]; else @@ -1555,7 +1571,6 @@ bool dcn20_split_stream_for_odm( if (next_odm_pipe->stream_res.dsc == NULL) return false; } - return true; } @@ -2133,6 +2148,7 @@ bool dcn20_fast_validate_bw( ASSERT(0); } } + /* Actual dsc count per stream dsc validation*/ if (!dcn20_validate_dsc(dc, context)) { context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] = diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 347e6aaea582f..7d82b75ed66b9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -1086,7 +1086,6 @@ static void read_dce_straps( } - static struct display_stream_compressor *dcn21_dsc_create(struct dc_context *ctx, uint32_t inst) { @@ -1298,7 +1297,7 @@ static struct link_encoder *dcn21_link_encoder_create( kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL); int link_regs_id; - if (!enc21) + if (!enc21 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) return NULL; link_regs_id = diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 5040a4c6ed186..75cc84473a577 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2354,6 +2354,7 @@ static bool dcn30_resource_construct( dc->caps.dp_hdmi21_pcon_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* read VBIOS LTTPR caps */ { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 7d04739c3ba14..806e563e165b5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -1238,7 +1238,6 @@ static struct display_stream_compressor *dcn301_dsc_create( return &dsc->base; } - static void dcn301_destroy_resource_pool(struct resource_pool **pool) { struct dcn301_resource_pool *dcn301_pool = TO_DCN301_RES_POOL(*pool); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 5791b5cc28752..320b040d591d1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -1234,6 +1234,7 @@ static bool dcn302_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index 63f0f882c8610..297cf4b5600da 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -1179,6 +1179,7 @@ static bool dcn303_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 3f4b9dba41124..f6b840f046a5d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1812,6 +1812,11 @@ static void dcn315_get_panel_config_defaults(struct dc_panel_config *panel_confi *panel_config = panel_config_defaults; } +static int dcn315_get_power_profile(const struct dc_state *context) +{ + return !context->bw_ctx.bw.dcn.clk.p_state_change_support; +} + static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; @@ -1840,6 +1845,7 @@ static struct resource_funcs dcn315_res_pool_funcs = { .update_bw_bounding_box = dcn315_update_bw_bounding_box, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn315_get_panel_config_defaults, + .get_power_profile = dcn315_get_power_profile, }; static bool dcn315_resource_construct( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 5fd52c5fcee45..bed951e7da867 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -557,6 +557,7 @@ static const struct dcn30_mmhubbub_mask mcif_wb30_mask = { MCIF_WB_COMMON_MASK_SH_LIST_DCN30(_MASK) }; + #define dsc_regsDCN20(id)\ [id] = {\ DSC_REG_LIST_DCN20(id)\ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index a124ad9bd108c..21ee6c3180645 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1990,6 +1990,10 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned return 0; } + if (dc->caps.max_cab_allocation_bytes == 0) { + return 0xffffffff; + } + /* add 2 lines for worst case alignment */ cache_lines_used = total_size_in_mall_bytes / dc->caps.cache_line_size + 2; @@ -2186,6 +2190,7 @@ static bool dcn32_resource_construct( dc->caps.dmcub_support = true; dc->caps.seamless_odm = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index 7901792afb7b3..86c6e5e8c42eb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -1054,7 +1054,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(OPTC_BYTES_PER_PIXEL, ODM, inst), \ SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst) + SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \ + SRI_ARR(OTG_PIPE_UPDATE_STATUS, OTG, inst) /* HUBP */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 827a94f84f100..1617bdfba0025 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -445,7 +445,6 @@ static const struct dcn20_dsc_shift dsc_shift = { static const struct dcn20_dsc_mask dsc_mask = { DSC_REG_LIST_SH_MASK_DCN20(_MASK) }; - static struct dcn30_mpc_registers mpc_regs; #define dcn_mpc_regs_init()\ MPC_REG_LIST_DCN3_2_RI(0),\ @@ -1374,7 +1373,6 @@ static void dcn321_resource_destruct(struct dcn321_resource_pool *pool) if (pool->base.dscs[i] != NULL) dcn20_dsc_destroy(&pool->base.dscs[i]); } - if (pool->base.mpc != NULL) { kfree(TO_DCN20_MPC(pool->base.mpc)); pool->base.mpc = NULL; @@ -1743,6 +1741,7 @@ static bool dcn321_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 5f3705f97bd74..ed3238edaf791 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -786,6 +786,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .disable_timeout = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { @@ -1849,6 +1850,7 @@ static bool dcn35_resource_construct( dc->caps.zstate_support = true; dc->caps.ips_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; @@ -2154,6 +2156,7 @@ static bool dcn35_resource_construct( dc->dml2_options.max_segments_per_hubp = 24; dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ + dc->dml2_options.override_det_buffer_size_kbytes = true; if (dc->config.sdpif_request_limit_words_per_umc == 0) dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 4c5e722baa3a6..c274861e83c73 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = { .hdmichar = true, .dpstream = true, .symclk32_se = true, - .symclk32_le = true, + .symclk32_le = false, .symclk_fe = true, .physymclk = false, .dpiasymclk = true, @@ -766,6 +766,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .notify_dpia_hr_bw = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { @@ -1828,6 +1829,7 @@ static bool dcn351_resource_construct( dc->caps.zstate_support = true; dc->caps.ips_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; @@ -2133,6 +2135,7 @@ static bool dcn351_resource_construct( dc->dml2_options.max_segments_per_hubp = 24; dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/ + dc->dml2_options.override_det_buffer_size_kbytes = true; if (dc->config.sdpif_request_limit_words_per_umc == 0) dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 02e63b95c36d3..2b79a8783f41c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -76,6 +76,9 @@ #include "dml2/dml2_wrapper.h" +#include "spl/dc_spl_scl_easf_filters.h" +#include "spl/dc_spl_isharp_filters.h" + #define DC_LOGGER_INIT(logger) enum dcn401_clk_src_array_id { @@ -1685,6 +1688,45 @@ static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx) } } +static int dcn401_get_power_profile(const struct dc_state *context) +{ + int uclk_mhz = context->bw_ctx.bw.dcn.clk.dramclk_khz / 1000; + int dpm_level = 0; + + for (int i = 0; i < context->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) { + if (context->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz == 0 || + uclk_mhz < context->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz) + break; + if (uclk_mhz > context->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz) + dpm_level++; + } + + return dpm_level; +} + +static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans) +{ + unsigned int num_available_chans = 1; + + /* channels for MALL must be a power of 2 */ + while (num_chans > 1) { + num_available_chans = (num_available_chans << 1); + num_chans = (num_chans >> 1); + } + + /* cannot be odd */ + num_available_chans &= ~1; + + /* clamp to max available channels for MALL per ASIC */ + if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) { + num_available_chans = num_available_chans > 16 ? 16 : num_available_chans; + } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) { + num_available_chans = num_available_chans > 8 ? 8 : num_available_chans; + } + + return num_available_chans; +} + static struct resource_funcs dcn401_res_pool_funcs = { .destroy = dcn401_destroy_resource_pool, .link_enc_create = dcn401_link_encoder_create, @@ -1711,6 +1753,7 @@ static struct resource_funcs dcn401_res_pool_funcs = { .prepare_mcache_programming = dcn401_prepare_mcache_programming, .build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params, .calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes, + .get_power_profile = dcn401_get_power_profile, }; static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -1792,14 +1835,12 @@ static bool dcn401_resource_construct( dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; dc->caps.mall_size_per_mem_channel = 4; - /* total size = mall per channel * num channels * 1024 * 1024 */ - dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; dc->caps.cache_line_size = 64; dc->caps.cache_num_ways = 16; /* Calculate the available MALL space */ - dc->caps.max_cab_allocation_bytes = dcn32_calc_num_avail_chans_for_mall( + dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall( dc, dc->ctx->dc_bios->vram_info.num_chans) * dc->caps.mall_size_per_mem_channel * 1024 * 1024; dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes; @@ -1823,6 +1864,7 @@ static bool dcn401_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.max_v_total = (1 << 15) - 1; + dc->caps.vtotal_limited_by_fp2 = true; if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) dc->caps.dcc_plane_width_limit = 7680; @@ -1864,6 +1906,7 @@ static bool dcn401_resource_construct( dc->config.prefer_easf = true; dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.set_pipe_unlock_order = true; /* Need to ensure DET gets freed before allocating */ /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { @@ -2126,6 +2169,11 @@ static bool dcn401_resource_construct( dc->dml2_options.max_segments_per_hubp = 20; dc->dml2_options.det_segment_size = DCN4_01_CRB_SEGMENT_SIZE_KB; + /* SPL */ + spl_init_easf_filter_coeffs(); + spl_init_blur_scale_coeffs(); + dc->caps.scl_caps.sharpener_support = true; + return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 514d1ce20df9e..19568c3596694 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -536,8 +536,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ SRI_ARR(OPTC_WIDTH_CONTROL2, ODM, inst), \ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \ - SRI_ARR(OTG_PSTATE_REGISTER, OTG, inst) + SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \ + SRI_ARR(OTG_PSTATE_REGISTER, OTG, inst), \ + SRI_ARR(OTG_PIPE_UPDATE_STATUS, OTG, inst) /* HUBBUB */ #define HUBBUB_REG_LIST_DCN4_01_RI(id) \ @@ -609,7 +610,10 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SR(DCHUBBUB_CLOCK_CNTL), \ SR(DCHUBBUB_SDPIF_CFG0), \ SR(DCHUBBUB_SDPIF_CFG1), \ - SR(DCHUBBUB_MEM_PWR_MODE_CTRL) + SR(DCHUBBUB_MEM_PWR_MODE_CTRL), \ + SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL1), \ + SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2), \ + SR(DCHUBBUB_CTRL_STATUS) /* DCCG */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index f8df85ea4d327..5edf3c6cf3e2d 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_filters_old.o dc_spl_isharp_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_easf_filters.o dc_spl_isharp_filters.o dc_spl_filters.o spl_fixpt31_32.o spl_custom_float.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 9eccdb38bed48..7ceeefac31f72 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -4,9 +4,11 @@ #include "dc_spl.h" #include "dc_spl_scl_filters.h" +#include "dc_spl_scl_easf_filters.h" #include "dc_spl_isharp_filters.h" +#include "spl_debug.h" -#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) +#define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19)) #define MIN_VIEWPORT_SIZE 12 static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1) @@ -107,26 +109,26 @@ static struct spl_rect calculate_plane_rec_in_timing_active( const struct spl_rect *stream_src = &spl_in->basic_out.src_rect; const struct spl_rect *stream_dst = &spl_in->basic_out.dst_rect; struct spl_rect rec_out = {0}; - struct fixed31_32 temp; + struct spl_fixed31_32 temp; - temp = dc_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, + temp = spl_fixpt_from_fraction(rec_in->x * (long long)stream_dst->width, stream_src->width); - rec_out.x = stream_dst->x + dc_fixpt_round(temp); + rec_out.x = stream_dst->x + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->x + rec_in->width) * (long long)stream_dst->width, stream_src->width); - rec_out.width = stream_dst->x + dc_fixpt_round(temp) - rec_out.x; + rec_out.width = stream_dst->x + spl_fixpt_round(temp) - rec_out.x; - temp = dc_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, + temp = spl_fixpt_from_fraction(rec_in->y * (long long)stream_dst->height, stream_src->height); - rec_out.y = stream_dst->y + dc_fixpt_round(temp); + rec_out.y = stream_dst->y + spl_fixpt_round(temp); - temp = dc_fixpt_from_fraction( + temp = spl_fixpt_from_fraction( (rec_in->y + rec_in->height) * (long long)stream_dst->height, stream_src->height); - rec_out.height = stream_dst->y + dc_fixpt_round(temp) - rec_out.y; + rec_out.height = stream_dst->y + spl_fixpt_round(temp) - rec_out.y; return rec_out; } @@ -144,7 +146,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; mpc_rec.height = plane_clip_rec->height; mpc_rec.y = plane_clip_rec->y; - ASSERT(mpc_slice_count == 1 || + SPL_ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -157,7 +159,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( } if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) { - ASSERT(mpc_rec.height % 2 == 0); + SPL_ASSERT(mpc_rec.height % 2 == 0); mpc_rec.height /= 2; } return mpc_rec; @@ -197,7 +199,7 @@ static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_i return spl_in->basic_out.odm_slice_rect; } -static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_recout(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out) { /* * A plane clip represents the desired plane size and position in Stream @@ -340,20 +342,23 @@ static void spl_calculate_recout(struct spl_in *spl_in, struct spl_out *spl_out) /* shift the overlapping area so it is with respect to current * ODM slice's position */ - spl_out->scl_data.recout = shift_rec( + spl_scratch->scl_data.recout = shift_rec( &overlapping_area, -odm_slice.x, -odm_slice.y); - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_base_offset; - spl_out->scl_data.recout.height -= + spl_scratch->scl_data.recout.height -= spl_in->debug.visual_confirm_dpp_offset; } else /* if there is no overlap, zero recout */ - memset(&spl_out->scl_data.recout, 0, + memset(&spl_scratch->scl_data.recout, 0, sizeof(struct spl_rect)); } + /* Calculate scaling ratios */ -static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_scaling_ratios(struct spl_in *spl_in, + struct spl_scratch *spl_scratch, + struct spl_out *spl_out) { const int in_w = spl_in->basic_out.src_rect.width; const int in_h = spl_in->basic_out.src_rect.height; @@ -364,59 +369,75 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in, struct spl_out * /*Swap surf_src height and width since scaling ratios are in recout rotation*/ if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) - swap(surf_src.height, surf_src.width); + spl_swap(surf_src.height, surf_src.width); - spl_out->scl_data.ratios.horz = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.horz = spl_fixpt_from_fraction( surf_src.width, spl_in->basic_in.dst_rect.width); - spl_out->scl_data.ratios.vert = dc_fixpt_from_fraction( + spl_scratch->scl_data.ratios.vert = spl_fixpt_from_fraction( surf_src.height, spl_in->basic_in.dst_rect.height); if (spl_in->basic_out.view_format == SPL_VIEW_3D_SIDE_BY_SIDE) - spl_out->scl_data.ratios.horz.value *= 2; + spl_scratch->scl_data.ratios.horz.value *= 2; else if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) - spl_out->scl_data.ratios.vert.value *= 2; + spl_scratch->scl_data.ratios.vert.value *= 2; - spl_out->scl_data.ratios.vert.value = div64_s64( - spl_out->scl_data.ratios.vert.value * in_h, out_h); - spl_out->scl_data.ratios.horz.value = div64_s64( - spl_out->scl_data.ratios.horz.value * in_w, out_w); + spl_scratch->scl_data.ratios.vert.value = spl_div64_s64( + spl_scratch->scl_data.ratios.vert.value * in_h, out_h); + spl_scratch->scl_data.ratios.horz.value = spl_div64_s64( + spl_scratch->scl_data.ratios.horz.value * in_w, out_w); - spl_out->scl_data.ratios.horz_c = spl_out->scl_data.ratios.horz; - spl_out->scl_data.ratios.vert_c = spl_out->scl_data.ratios.vert; + spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz; + spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert; if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) { - spl_out->scl_data.ratios.horz_c.value /= 2; - spl_out->scl_data.ratios.vert_c.value /= 2; + spl_scratch->scl_data.ratios.horz_c.value /= 2; + spl_scratch->scl_data.ratios.vert_c.value /= 2; } - spl_out->scl_data.ratios.horz = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz, 19); - spl_out->scl_data.ratios.vert = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert, 19); - spl_out->scl_data.ratios.horz_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.horz_c, 19); - spl_out->scl_data.ratios.vert_c = dc_fixpt_truncate( - spl_out->scl_data.ratios.vert_c, 19); + spl_scratch->scl_data.ratios.horz = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz, 19); + spl_scratch->scl_data.ratios.vert = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert, 19); + spl_scratch->scl_data.ratios.horz_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.horz_c, 19); + spl_scratch->scl_data.ratios.vert_c = spl_fixpt_truncate( + spl_scratch->scl_data.ratios.vert_c, 19); + + /* + * Coefficient table and some registers are different based on ratio + * that is output/input. Currently we calculate input/output + * Store 1/ratio in recip_ratio for those lookups + */ + spl_scratch->scl_data.recip_ratios.horz = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz); + spl_scratch->scl_data.recip_ratios.vert = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert); + spl_scratch->scl_data.recip_ratios.horz_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.horz_c); + spl_scratch->scl_data.recip_ratios.vert_c = spl_fixpt_recip( + spl_scratch->scl_data.ratios.vert_c); } + /* Calculate Viewport size */ -static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_viewport_size(struct spl_in *spl_in, struct spl_scratch *spl_scratch) { - spl_out->scl_data.viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert, - spl_out->scl_data.recout.height)); - spl_out->scl_data.viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.horz_c, - spl_out->scl_data.recout.width)); - spl_out->scl_data.viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(spl_out->scl_data.ratios.vert_c, - spl_out->scl_data.recout.height)); + spl_scratch->scl_data.viewport.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert, + spl_scratch->scl_data.recout.height)); + spl_scratch->scl_data.viewport_c.width = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.horz_c, + spl_scratch->scl_data.recout.width)); + spl_scratch->scl_data.viewport_c.height = spl_fixpt_ceil(spl_fixpt_mul_int(spl_scratch->scl_data.ratios.vert_c, + spl_scratch->scl_data.recout.height)); if (spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_90 || spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_270) { - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } } + static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, bool horizontal_mirror, bool *orthogonal_rotation, @@ -440,6 +461,7 @@ static void spl_get_vp_scan_direction(enum spl_rotation_angle rotation, if (horizontal_mirror) *flip_horz_scan_dir = !*flip_horz_scan_dir; } + /* * We completely calculate vp offset, size and inits here based entirely on scaling * ratios and recout for pixel perfect pipe combine. @@ -449,13 +471,13 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, int recout_size, int src_size, int taps, - struct fixed31_32 ratio, - struct fixed31_32 init_adj, - struct fixed31_32 *init, + struct spl_fixed31_32 ratio, + struct spl_fixed31_32 init_adj, + struct spl_fixed31_32 *init, int *vp_offset, int *vp_size) { - struct fixed31_32 temp; + struct spl_fixed31_32 temp; int int_part; /* @@ -468,33 +490,33 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, * init_bot = init + scaling_ratio * to get pixel perfect combine add the fraction from calculating vp offset */ - temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); - *vp_offset = dc_fixpt_floor(temp); + temp = spl_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = spl_fixpt_floor(temp); temp.value &= 0xffffffff; - *init = dc_fixpt_add(dc_fixpt_div_int(dc_fixpt_add_int(ratio, taps + 1), 2), temp); - *init = dc_fixpt_add(*init, init_adj); - *init = dc_fixpt_truncate(*init, 19); + *init = spl_fixpt_add(spl_fixpt_div_int(spl_fixpt_add_int(ratio, taps + 1), 2), temp); + *init = spl_fixpt_add(*init, init_adj); + *init = spl_fixpt_truncate(*init, 19); /* * If viewport has non 0 offset and there are more taps than covered by init then * we should decrease the offset and increase init so we are never sampling * outside of viewport. */ - int_part = dc_fixpt_floor(*init); + int_part = spl_fixpt_floor(*init); if (int_part < taps) { int_part = taps - int_part; if (int_part > *vp_offset) int_part = *vp_offset; *vp_offset -= int_part; - *init = dc_fixpt_add_int(*init, int_part); + *init = spl_fixpt_add_int(*init, int_part); } /* * If taps are sampling outside of viewport at end of recout and there are more pixels * available in the surface we should increase the viewport size, regardless set vp to * only what is used. */ - temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); - *vp_size = dc_fixpt_floor(temp); + temp = spl_fixpt_add(*init, spl_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = spl_fixpt_floor(temp); if (*vp_size + *vp_offset > src_size) *vp_size = src_size - *vp_offset; @@ -509,15 +531,24 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir, static bool spl_is_yuv420(enum spl_pixel_format format) { - if ((format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN) && - (format <= SPL_PIXEL_FORMAT_VIDEO_END)) + if ((format >= SPL_PIXEL_FORMAT_420BPP8) && + (format <= SPL_PIXEL_FORMAT_420BPP10)) + return true; + + return false; +} + +static bool spl_is_rgb8(enum spl_pixel_format format) +{ + if (format == SPL_PIXEL_FORMAT_ARGB8888) return true; return false; } /*Calculate inits and viewport */ -static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, + struct spl_scratch *spl_scratch) { struct spl_rect src = spl_in->basic_in.src_rect; struct spl_rect recout_dst_in_active_timing; @@ -528,11 +559,11 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8 || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - struct fixed31_32 init_adj_h = dc_fixpt_zero; - struct fixed31_32 init_adj_v = dc_fixpt_zero; + struct spl_fixed31_32 init_adj_h = spl_fixpt_zero; + struct spl_fixed31_32 init_adj_v = spl_fixpt_zero; recout_clip_in_active_timing = shift_rec( - &spl_out->scl_data.recout, odm_slice.x, odm_slice.y); + &spl_scratch->scl_data.recout, odm_slice.x, odm_slice.y); recout_dst_in_active_timing = calculate_plane_rec_in_timing_active( spl_in, &spl_in->basic_in.dst_rect); overlap_in_active_timing = intersect_rec(&recout_clip_in_active_timing, @@ -555,8 +586,8 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ &flip_horz_scan_dir); if (orthogonal_rotation) { - swap(src.width, src.height); - swap(flip_vert_scan_dir, flip_horz_scan_dir); + spl_swap(src.width, src.height); + spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); } if (spl_is_yuv420(spl_in->basic_in.format)) { @@ -568,17 +599,17 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ switch (spl_in->basic_in.cositing) { case CHROMA_COSITING_LEFT: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_NONE: - init_adj_h = dc_fixpt_from_fraction(sign, 2); - init_adj_v = dc_fixpt_from_fraction(sign, 2); + init_adj_h = spl_fixpt_from_fraction(sign, 4); + init_adj_v = spl_fixpt_from_fraction(sign, 4); break; case CHROMA_COSITING_TOPLEFT: default: - init_adj_h = dc_fixpt_zero; - init_adj_v = dc_fixpt_zero; + init_adj_h = spl_fixpt_zero; + init_adj_v = spl_fixpt_zero; break; } } @@ -586,59 +617,60 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_ spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width, - spl_out->scl_data.taps.h_taps, - spl_out->scl_data.ratios.horz, - dc_fixpt_zero, - &spl_out->scl_data.inits.h, - &spl_out->scl_data.viewport.x, - &spl_out->scl_data.viewport.width); + spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.ratios.horz, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.h, + &spl_scratch->scl_data.viewport.x, + &spl_scratch->scl_data.viewport.width); spl_calculate_init_and_vp( flip_horz_scan_dir, recout_clip_in_recout_dst.x, - spl_out->scl_data.recout.width, + spl_scratch->scl_data.recout.width, src.width / vpc_div, - spl_out->scl_data.taps.h_taps_c, - spl_out->scl_data.ratios.horz_c, + spl_scratch->scl_data.taps.h_taps_c, + spl_scratch->scl_data.ratios.horz_c, init_adj_h, - &spl_out->scl_data.inits.h_c, - &spl_out->scl_data.viewport_c.x, - &spl_out->scl_data.viewport_c.width); + &spl_scratch->scl_data.inits.h_c, + &spl_scratch->scl_data.viewport_c.x, + &spl_scratch->scl_data.viewport_c.width); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height, - spl_out->scl_data.taps.v_taps, - spl_out->scl_data.ratios.vert, - dc_fixpt_zero, - &spl_out->scl_data.inits.v, - &spl_out->scl_data.viewport.y, - &spl_out->scl_data.viewport.height); + spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.ratios.vert, + spl_fixpt_zero, + &spl_scratch->scl_data.inits.v, + &spl_scratch->scl_data.viewport.y, + &spl_scratch->scl_data.viewport.height); spl_calculate_init_and_vp( flip_vert_scan_dir, recout_clip_in_recout_dst.y, - spl_out->scl_data.recout.height, + spl_scratch->scl_data.recout.height, src.height / vpc_div, - spl_out->scl_data.taps.v_taps_c, - spl_out->scl_data.ratios.vert_c, + spl_scratch->scl_data.taps.v_taps_c, + spl_scratch->scl_data.ratios.vert_c, init_adj_v, - &spl_out->scl_data.inits.v_c, - &spl_out->scl_data.viewport_c.y, - &spl_out->scl_data.viewport_c.height); + &spl_scratch->scl_data.inits.v_c, + &spl_scratch->scl_data.viewport_c.y, + &spl_scratch->scl_data.viewport_c.height); if (orthogonal_rotation) { - swap(spl_out->scl_data.viewport.x, spl_out->scl_data.viewport.y); - swap(spl_out->scl_data.viewport.width, spl_out->scl_data.viewport.height); - swap(spl_out->scl_data.viewport_c.x, spl_out->scl_data.viewport_c.y); - swap(spl_out->scl_data.viewport_c.width, spl_out->scl_data.viewport_c.height); + spl_swap(spl_scratch->scl_data.viewport.x, spl_scratch->scl_data.viewport.y); + spl_swap(spl_scratch->scl_data.viewport.width, spl_scratch->scl_data.viewport.height); + spl_swap(spl_scratch->scl_data.viewport_c.x, spl_scratch->scl_data.viewport_c.y); + spl_swap(spl_scratch->scl_data.viewport_c.width, spl_scratch->scl_data.viewport_c.height); } - spl_out->scl_data.viewport.x += src.x; - spl_out->scl_data.viewport.y += src.y; - ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); - spl_out->scl_data.viewport_c.x += src.x / vpc_div; - spl_out->scl_data.viewport_c.y += src.y / vpc_div; + spl_scratch->scl_data.viewport.x += src.x; + spl_scratch->scl_data.viewport.y += src.y; + SPL_ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + spl_scratch->scl_data.viewport_c.x += src.x / vpc_div; + spl_scratch->scl_data.viewport_c.y += src.y / vpc_div; } + static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) { /* @@ -647,7 +679,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ if (spl_in->basic_in.mpc_combine_v) { - ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || + SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); if (spl_in->basic_out.view_format == SPL_VIEW_3D_TOP_AND_BOTTOM) @@ -665,6 +697,7 @@ static void spl_clamp_viewport(struct spl_rect *viewport) if (viewport->width < MIN_VIEWPORT_SIZE) viewport->width = MIN_VIEWPORT_SIZE; } + static bool spl_dscl_is_420_format(enum spl_pixel_format format) { if (format == SPL_PIXEL_FORMAT_420BPP8 || @@ -673,6 +706,7 @@ static bool spl_dscl_is_420_format(enum spl_pixel_format format) else return false; } + static bool spl_dscl_is_video_format(enum spl_pixel_format format) { if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN @@ -681,17 +715,21 @@ static bool spl_dscl_is_video_format(enum spl_pixel_format format) else return false; } + static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, + bool enable_isharp, bool enable_easf) { - const long long one = dc_fixpt_one.value; + const long long one = spl_fixpt_one.value; enum spl_pixel_format pixel_format = spl_in->basic_in.format; + /* Bypass if ratio is 1:1 with no ISHARP or force scale on */ if (data->ratios.horz.value == one && data->ratios.vert.value == one && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one - && !spl_in->basic_out.always_scale) + && !spl_in->basic_out.always_scale + && !enable_isharp) return SCL_MODE_SCALING_444_BYPASS; if (!spl_dscl_is_420_format(pixel_format)) { @@ -700,69 +738,248 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, else return SCL_MODE_SCALING_444_RGB_ENABLE; } - if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return SCL_MODE_SCALING_420_LUMA_BYPASS; - if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return SCL_MODE_SCALING_420_CHROMA_BYPASS; + + /* + * Bypass YUV if Y is 1:1 with no ISHARP + * Do not bypass UV at 1:1 for cositing to be applied + */ + if (!enable_isharp) { + if (data->ratios.horz.value == one && data->ratios.vert.value == one) + return SCL_MODE_SCALING_420_LUMA_BYPASS; + } return SCL_MODE_SCALING_420_YCBCR_ENABLE; } -/* Calculate optimal number of taps */ -static bool spl_get_optimal_number_of_taps( - int max_downscale_src_width, struct spl_in *spl_in, struct spl_out *spl_out, - const struct spl_taps *in_taps) + +static bool spl_choose_lls_policy(enum spl_pixel_format format, + enum spl_transfer_func_type tf_type, + enum spl_transfer_func_predefined tf_predefined_type, + enum linear_light_scaling *lls_pref) { - int num_part_y, num_part_c; - int max_taps_y, max_taps_c; - int min_taps_y, min_taps_c; - enum lb_memory_config lb_config; + if (spl_is_yuv420(format)) { + *lls_pref = LLS_PREF_NO; + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) + return true; + } else { /* RGB or YUV444 */ + if ((tf_type == SPL_TF_TYPE_PREDEFINED) || + (tf_type == SPL_TF_TYPE_BYPASS)) { + *lls_pref = LLS_PREF_YES; + return true; + } + } + *lls_pref = LLS_PREF_NO; + return false; +} + +/* Enable EASF ?*/ +static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) +{ + int vratio = 0; + int hratio = 0; + bool skip_easf = false; + bool lls_enable_easf = true; + + if (spl_in->disable_easf) + skip_easf = true; + + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); - if (spl_out->scl_data.viewport.width > spl_out->scl_data.h_active && - max_downscale_src_width != 0 && - spl_out->scl_data.viewport.width > max_downscale_src_width) - return false; /* - * Set default taps if none are provided - * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling - * taps = 4 for upscaling + * No EASF support for downscaling > 2:1 + * EASF support for upscaling or downscaling up to 2:1 */ + if ((vratio > 2) || (hratio > 2)) + skip_easf = true; + + /* + * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer + * function to determine whether to use LINEAR or NONLINEAR scaling + */ + if (spl_in->lls_pref == LLS_PREF_DONT_CARE) + lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, + spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, + &spl_in->lls_pref); + + if (!lls_enable_easf) + skip_easf = true; + + /* Check for linear scaling or EASF preferred */ + if (spl_in->lls_pref != LLS_PREF_YES && !spl_in->prefer_easf) + skip_easf = true; + + return skip_easf; +} + +/* Check if video is in fullscreen mode */ +static bool spl_is_video_fullscreen(struct spl_in *spl_in) +{ + if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) + return true; + return false; +} + +static bool spl_get_isharp_en(struct spl_in *spl_in, + struct spl_scratch *spl_scratch) +{ + bool enable_isharp = false; + int vratio = 0; + int hratio = 0; + struct spl_taps taps = spl_scratch->scl_data.taps; + bool fullscreen = spl_is_video_fullscreen(spl_in); + + /* Return if adaptive sharpness is disabled */ + if (spl_in->adaptive_sharpness.enable == false) + return enable_isharp; + + vratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz); + + /* No iSHARP support for downscaling */ + if (vratio > 1 || hratio > 1) + return enable_isharp; + + // Scaling is up to 1:1 (no scaling) or upscaling + + /* + * Apply sharpness to RGB and YUV (NV12/P010) + * surfaces based on policy setting + */ + if (!spl_is_yuv420(spl_in->basic_in.format) && + (spl_in->sharpen_policy == SHARPEN_YUV)) + return enable_isharp; + else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) && + (spl_in->sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV)) + return enable_isharp; + else if (!spl_in->is_fullscreen && + spl_in->sharpen_policy == SHARPEN_FULLSCREEN_ALL) + return enable_isharp; + + /* + * Apply sharpness if supports horizontal taps 4,6 AND + * vertical taps 3, 4, 6 + */ + if ((taps.h_taps == 4 || taps.h_taps == 6) && + (taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6)) + enable_isharp = true; + + return enable_isharp; +} + +/* Calculate number of tap with adaptive scaling off */ +static void spl_get_taps_non_adaptive_scaler( + struct spl_scratch *spl_scratch, const struct spl_taps *in_taps) +{ if (in_taps->h_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz) > 1) - spl_out->scl_data.taps.h_taps = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) + spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz), 8); else - spl_out->scl_data.taps.h_taps = 4; + spl_scratch->scl_data.taps.h_taps = 4; } else - spl_out->scl_data.taps.h_taps = in_taps->h_taps; + spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; + if (in_taps->v_taps == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 1) - spl_out->scl_data.taps.v_taps = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert, 2)), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) + spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert, 2)), 8); else - spl_out->scl_data.taps.v_taps = 4; + spl_scratch->scl_data.taps.v_taps = 4; } else - spl_out->scl_data.taps.v_taps = in_taps->v_taps; + spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; + if (in_taps->v_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 1) - spl_out->scl_data.taps.v_taps_c = min(dc_fixpt_ceil(dc_fixpt_mul_int( - spl_out->scl_data.ratios.vert_c, 2)), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) + spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert_c, 2)), 8); else - spl_out->scl_data.taps.v_taps_c = 4; + spl_scratch->scl_data.taps.v_taps_c = 4; } else - spl_out->scl_data.taps.v_taps_c = in_taps->v_taps_c; + spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; + if (in_taps->h_taps_c == 0) { - if (dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c) > 1) - spl_out->scl_data.taps.h_taps_c = min(2 * dc_fixpt_ceil(spl_out->scl_data.ratios.horz_c), 8); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) + spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz_c), 8); else - spl_out->scl_data.taps.h_taps_c = 4; + spl_scratch->scl_data.taps.h_taps_c = 4; } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) /* Only 1 and even h_taps_c are supported by hw */ - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; else - spl_out->scl_data.taps.h_taps_c = in_taps->h_taps_c; + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; + + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) + spl_scratch->scl_data.taps.h_taps = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert)) + spl_scratch->scl_data.taps.v_taps = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)) + spl_scratch->scl_data.taps.h_taps_c = 1; + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)) + spl_scratch->scl_data.taps.v_taps_c = 1; + +} + +/* Calculate optimal number of taps */ +static bool spl_get_optimal_number_of_taps( + int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch, + const struct spl_taps *in_taps, bool *enable_easf_v, bool *enable_easf_h, + bool *enable_isharp) +{ + int num_part_y, num_part_c; + int max_taps_y, max_taps_c; + int min_taps_y, min_taps_c; + enum lb_memory_config lb_config; + bool skip_easf = false; + bool is_ycbcr = spl_dscl_is_video_format(spl_in->basic_in.format); + + if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && + max_downscale_src_width != 0 && + spl_scratch->scl_data.viewport.width > max_downscale_src_width) { + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); + *enable_easf_v = false; + *enable_easf_h = false; + *enable_isharp = false; + return false; + } + + /* Disable adaptive scaler and sharpener when integer scaling is enabled */ + if (spl_in->scaling_quality.integer_scaling) { + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); + *enable_easf_v = false; + *enable_easf_h = false; + *enable_isharp = false; + return true; + } + + /* Check if we are using EASF or not */ + skip_easf = enable_easf(spl_in, spl_scratch); + + /* + * Set default taps if none are provided + * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling + * taps = 4 for upscaling + */ + if (skip_easf) + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); + else { + if (spl_is_yuv420(spl_in->basic_in.format)) { + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 4; + spl_scratch->scl_data.taps.v_taps_c = 4; + } else { /* RGB */ + spl_scratch->scl_data.taps.h_taps = 6; + spl_scratch->scl_data.taps.v_taps = 6; + spl_scratch->scl_data.taps.h_taps_c = 6; + spl_scratch->scl_data.taps.v_taps_c = 6; + } + } /*Ensure we can support the requested number of vtaps*/ - min_taps_y = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - min_taps_c = dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c); + min_taps_y = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert); + min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) @@ -771,16 +988,16 @@ static bool spl_get_optimal_number_of_taps( else lb_config = LB_MEMORY_CONFIG_0; // Determine max vtap support by calculating how much line buffer can fit - spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_out->scl_data, + spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, lb_config, &num_part_y, &num_part_c); /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) > 2) - max_taps_y = num_part_y - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2) + max_taps_y = num_part_y - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) - 2); else max_taps_y = num_part_y; - if (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) > 2) - max_taps_c = num_part_c - (dc_fixpt_ceil(spl_out->scl_data.ratios.vert_c) - 2); + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 2) + max_taps_c = num_part_c - (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) - 2); else max_taps_c = num_part_c; @@ -789,53 +1006,117 @@ static bool spl_get_optimal_number_of_taps( else if (max_taps_c < min_taps_c) return false; - if (spl_out->scl_data.taps.v_taps > max_taps_y) - spl_out->scl_data.taps.v_taps = max_taps_y; - - if (spl_out->scl_data.taps.v_taps_c > max_taps_c) - spl_out->scl_data.taps.v_taps_c = max_taps_c; - if (spl_in->prefer_easf) { - // EASF can be enabled only for taps 3,4,6 - // If optimal no of taps is 5, then set it to 4 - // If optimal no of taps is 7 or 8, then set it to 6 - if (spl_out->scl_data.taps.v_taps == 5) - spl_out->scl_data.taps.v_taps = 4; - if (spl_out->scl_data.taps.v_taps == 7 || spl_out->scl_data.taps.v_taps == 8) - spl_out->scl_data.taps.v_taps = 6; - - if (spl_out->scl_data.taps.v_taps_c == 5) - spl_out->scl_data.taps.v_taps_c = 4; - if (spl_out->scl_data.taps.v_taps_c == 7 || spl_out->scl_data.taps.v_taps_c == 8) - spl_out->scl_data.taps.v_taps_c = 6; - - if (spl_out->scl_data.taps.h_taps == 5) - spl_out->scl_data.taps.h_taps = 4; - if (spl_out->scl_data.taps.h_taps == 7 || spl_out->scl_data.taps.h_taps == 8) - spl_out->scl_data.taps.h_taps = 6; - - if (spl_out->scl_data.taps.h_taps_c == 5) - spl_out->scl_data.taps.h_taps_c = 4; - if (spl_out->scl_data.taps.h_taps_c == 7 || spl_out->scl_data.taps.h_taps_c == 8) - spl_out->scl_data.taps.h_taps_c = 6; + if (spl_scratch->scl_data.taps.v_taps > max_taps_y) + spl_scratch->scl_data.taps.v_taps = max_taps_y; + + if (spl_scratch->scl_data.taps.v_taps_c > max_taps_c) + spl_scratch->scl_data.taps.v_taps_c = max_taps_c; + if (!skip_easf) { + /* + * RGB ( L + NL ) and Linear HDR support 6x6, 6x4, 6x3, 4x4, 4x3 + * NL YUV420 only supports 6x6, 6x4 for Y and 4x4 for UV + * + * If LB does not support 3, 4, or 6 taps, then disable EASF_V + * and only enable EASF_H. So for RGB, support 6x2, 4x2 + * and for NL YUV420, support 6x2 for Y and 4x2 for UV + * + * All other cases, have to disable EASF_V and EASF_H + * + * If optimal no of taps is 5, then set it to 4 + * If optimal no of taps is 7 or 8, then fine since max tap is 6 + * + */ + if (spl_scratch->scl_data.taps.v_taps == 5) + spl_scratch->scl_data.taps.v_taps = 4; + + if (spl_scratch->scl_data.taps.v_taps_c == 5) + spl_scratch->scl_data.taps.v_taps_c = 4; + + if (spl_scratch->scl_data.taps.h_taps == 5) + spl_scratch->scl_data.taps.h_taps = 4; + + if (spl_scratch->scl_data.taps.h_taps_c == 5) + spl_scratch->scl_data.taps.h_taps_c = 4; + + if (spl_is_yuv420(spl_in->basic_in.format)) { + if (spl_scratch->scl_data.taps.h_taps <= 4) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if (spl_scratch->scl_data.taps.v_taps <= 3) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + SPL_ASSERT((spl_scratch->scl_data.taps.v_taps > 1) && + (spl_scratch->scl_data.taps.v_taps_c > 1)); + } else { /* RGB */ + if (spl_scratch->scl_data.taps.h_taps <= 3) { + *enable_easf_v = false; + *enable_easf_h = false; + } else if (spl_scratch->scl_data.taps.v_taps < 3) { + *enable_easf_v = false; + *enable_easf_h = true; + } else { + *enable_easf_v = true; + *enable_easf_h = true; + } + SPL_ASSERT(spl_scratch->scl_data.taps.v_taps > 1); + } + } else { + *enable_easf_v = false; + *enable_easf_h = false; } // end of if prefer_easf - if (!spl_in->basic_out.always_scale) { - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz)) - spl_out->scl_data.taps.h_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert)) - spl_out->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.horz_c)) - spl_out->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_out->scl_data.ratios.vert_c)) - spl_out->scl_data.taps.v_taps_c = 1; + + /* Sharpener requires scaler to be enabled, including for 1:1 + * Check if ISHARP can be enabled + * If ISHARP is not enabled, set taps to 1 if ratio is 1:1 + * except for chroma taps. Keep previous taps so it can + * handle cositing + */ + + *enable_isharp = spl_get_isharp_en(spl_in, spl_scratch); + if (!*enable_isharp && !spl_in->basic_out.always_scale) { + if ((IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) { + spl_scratch->scl_data.taps.h_taps = 1; + spl_scratch->scl_data.taps.v_taps = 1; + + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_ycbcr) + spl_scratch->scl_data.taps.h_taps_c = 1; + + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_ycbcr) + spl_scratch->scl_data.taps.v_taps_c = 1; + + *enable_easf_v = false; + *enable_easf_h = false; + } else { + if ((!*enable_easf_h) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz))) + spl_scratch->scl_data.taps.h_taps = 1; + + if ((!*enable_easf_v) && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) + spl_scratch->scl_data.taps.v_taps = 1; + + if ((!*enable_easf_h) && !is_ycbcr && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) + spl_scratch->scl_data.taps.h_taps_c = 1; + + if ((!*enable_easf_v) && !is_ycbcr && + (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) + spl_scratch->scl_data.taps.v_taps_c = 1; + } } return true; } + static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { - bool ycbcr = format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN - && format <= SPL_PIXEL_FORMAT_VIDEO_END; + bool ycbcr = spl_dscl_is_video_format(format); if (ycbcr) { scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y; scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR; @@ -848,38 +1129,38 @@ static void spl_set_black_color_data(enum spl_pixel_format format, static void spl_set_manual_ratio_init_data(struct dscl_prog_data *dscl_prog_data, const struct spl_scaler_data *scl_data) { - struct fixed31_32 bot; + struct spl_fixed31_32 bot; - dscl_prog_data->ratios.h_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.horz) << 5; - dscl_prog_data->ratios.v_scale_ratio = dc_fixpt_u3d19(scl_data->ratios.vert) << 5; - dscl_prog_data->ratios.h_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.horz_c) << 5; - dscl_prog_data->ratios.v_scale_ratio_c = dc_fixpt_u3d19(scl_data->ratios.vert_c) << 5; + dscl_prog_data->ratios.h_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.horz) << 5; + dscl_prog_data->ratios.v_scale_ratio = spl_fixpt_u3d19(scl_data->ratios.vert) << 5; + dscl_prog_data->ratios.h_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.horz_c) << 5; + dscl_prog_data->ratios.v_scale_ratio_c = spl_fixpt_u3d19(scl_data->ratios.vert_c) << 5; /* * 0.24 format for fraction, first five bits zeroed */ dscl_prog_data->init.h_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.h) << 5; + spl_fixpt_u0d19(scl_data->inits.h) << 5; dscl_prog_data->init.h_filter_init_int = - dc_fixpt_floor(scl_data->inits.h); + spl_fixpt_floor(scl_data->inits.h); dscl_prog_data->init.h_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.h_c) << 5; + spl_fixpt_u0d19(scl_data->inits.h_c) << 5; dscl_prog_data->init.h_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.h_c); + spl_fixpt_floor(scl_data->inits.h_c); dscl_prog_data->init.v_filter_init_frac = - dc_fixpt_u0d19(scl_data->inits.v) << 5; + spl_fixpt_u0d19(scl_data->inits.v) << 5; dscl_prog_data->init.v_filter_init_int = - dc_fixpt_floor(scl_data->inits.v); + spl_fixpt_floor(scl_data->inits.v); dscl_prog_data->init.v_filter_init_frac_c = - dc_fixpt_u0d19(scl_data->inits.v_c) << 5; + spl_fixpt_u0d19(scl_data->inits.v_c) << 5; dscl_prog_data->init.v_filter_init_int_c = - dc_fixpt_floor(scl_data->inits.v_c); - - bot = dc_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); - dscl_prog_data->init.v_filter_init_bot_frac = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int = dc_fixpt_floor(bot); - bot = dc_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); - dscl_prog_data->init.v_filter_init_bot_frac_c = dc_fixpt_u0d19(bot) << 5; - dscl_prog_data->init.v_filter_init_bot_int_c = dc_fixpt_floor(bot); + spl_fixpt_floor(scl_data->inits.v_c); + + bot = spl_fixpt_add(scl_data->inits.v, scl_data->ratios.vert); + dscl_prog_data->init.v_filter_init_bot_frac = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int = spl_fixpt_floor(bot); + bot = spl_fixpt_add(scl_data->inits.v_c, scl_data->ratios.vert_c); + dscl_prog_data->init.v_filter_init_bot_frac_c = spl_fixpt_u0d19(bot) << 5; + dscl_prog_data->init.v_filter_init_bot_int_c = spl_fixpt_floor(bot); } static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, @@ -890,79 +1171,28 @@ static void spl_set_taps_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->taps.v_taps_c = scl_data->taps.v_taps_c - 1; dscl_prog_data->taps.h_taps_c = scl_data->taps.h_taps_c - 1; } -static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) -{ - if (taps == 8) - return spl_get_filter_8tap_64p(ratio); - else if (taps == 7) - return spl_get_filter_7tap_64p(ratio); - else if (taps == 6) - return spl_get_filter_6tap_64p(ratio); - else if (taps == 5) - return spl_get_filter_5tap_64p(ratio); - else if (taps == 4) - return spl_get_filter_4tap_64p(ratio); - else if (taps == 3) - return spl_get_filter_3tap_64p(ratio); - else if (taps == 2) - return spl_get_filter_2tap_64p(); - else if (taps == 1) - return NULL; - else { - /* should never happen, bug */ - BREAK_TO_DEBUGGER(); - return NULL; - } -} -static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps, data->ratios.horz); - dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps, data->ratios.vert); - dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( - data->taps.h_taps_c, data->ratios.horz_c); - dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( - data->taps.v_taps_c, data->ratios.vert_c); -} -#ifdef CONFIG_DRM_AMD_DC_FP -static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) -{ - if ((taps == 3) || (taps == 4) || (taps == 6)) - return spl_get_filter_isharp_bs_4tap_64p(); - else { - /* should never happen, bug */ - BREAK_TO_DEBUGGER(); - return NULL; - } -} -static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, - const struct spl_scaler_data *data) -{ - dscl_prog_data->filter_blur_scale_h = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.h_taps); - dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( - data->taps.v_taps); -} -#endif + /* Populate dscl prog data structure from scaler data calculated by SPL */ -static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) +static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *spl_scratch, + struct spl_out *spl_out, bool enable_easf_v, bool enable_easf_h, bool enable_isharp) { struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; - const struct spl_scaler_data *data = &spl_out->scl_data; + const struct spl_scaler_data *data = &spl_scratch->scl_data; struct scl_black_color *scl_black_color = &dscl_prog_data->scl_black_color; + bool enable_easf = enable_easf_v || enable_easf_h; + // Set values for recout - dscl_prog_data->recout = spl_out->scl_data.recout; + dscl_prog_data->recout = spl_scratch->scl_data.recout; // Set values for MPC Size - dscl_prog_data->mpc_size.width = spl_out->scl_data.h_active; - dscl_prog_data->mpc_size.height = spl_out->scl_data.v_active; + dscl_prog_data->mpc_size.width = spl_scratch->scl_data.h_active; + dscl_prog_data->mpc_size.height = spl_scratch->scl_data.v_active; // SCL_MODE - Set SCL_MODE data - dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data); + dscl_prog_data->dscl_mode = spl_get_dscl_mode(spl_in, data, enable_isharp, + enable_easf); // SCL_BLACK_COLOR spl_set_black_color_data(spl_in->basic_in.format, scl_black_color); @@ -973,103 +1203,140 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_ou // Set HTaps/VTaps spl_set_taps_data(dscl_prog_data, data); // Set viewport - dscl_prog_data->viewport = spl_out->scl_data.viewport; + dscl_prog_data->viewport = spl_scratch->scl_data.viewport; // Set viewport_c - dscl_prog_data->viewport_c = spl_out->scl_data.viewport_c; + dscl_prog_data->viewport_c = spl_scratch->scl_data.viewport_c; // Set filters data - spl_set_filters_data(dscl_prog_data, data); + spl_set_filters_data(dscl_prog_data, data, enable_easf_v, enable_easf_h); } -/* Enable EASF ?*/ -static bool enable_easf(int scale_ratio, int taps, - enum linear_light_scaling lls_pref, bool prefer_easf) + +/* Calculate C0-C3 coefficients based on HDR_mult */ +static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t sdr_white_level_nits) { - // Is downscaling > 6:1 ? - if (scale_ratio > 6) { - // END - No EASF support for downscaling > 6:1 - return false; - } - // Is upscaling or downscaling up to 2:1? - if (scale_ratio <= 2) { - // Is linear scaling or EASF preferred? - if (lls_pref == LLS_PREF_YES || prefer_easf) { - // LB support taps 3, 4, 6 - if (taps == 3 || taps == 4 || taps == 6) { - // END - EASF supported - return true; - } - } - } - // END - EASF not supported - return false; + struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult; + struct spl_fixed31_32 c0_calc, c1_calc, c2_calc; + struct spl_custom_float_format fmt; + uint32_t hdr_multx100_int; + + if ((sdr_white_level_nits >= 80) && (sdr_white_level_nits <= 480)) + hdr_multx100_int = sdr_white_level_nits * 100 / 80; + else + hdr_multx100_int = 100; /* default for 80 nits otherwise */ + + hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100_int, 100LL); + c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL); + c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL); + c2_mult = spl_fixpt_from_fraction(722LL, 10000LL); + + c0_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c0_mult, spl_fixpt_from_fraction( + 16384LL, 125LL))); + c1_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c1_mult, spl_fixpt_from_fraction( + 16384LL, 125LL))); + c2_calc = spl_fixpt_mul(hdr_mult, spl_fixpt_mul(c2_mult, spl_fixpt_from_fraction( + 16384LL, 125LL))); + + fmt.exponenta_bits = 5; + fmt.mantissa_bits = 10; + fmt.sign = true; + + // fp1.5.10, C0 coefficient (LN_rec709: HDR_MULT * 0.212600 * 2^14/125) + spl_convert_to_custom_float_format(c0_calc, &fmt, &dscl_prog_data->easf_matrix_c0); + // fp1.5.10, C1 coefficient (LN_rec709: HDR_MULT * 0.715200 * 2^14/125) + spl_convert_to_custom_float_format(c1_calc, &fmt, &dscl_prog_data->easf_matrix_c1); + // fp1.5.10, C2 coefficient (LN_rec709: HDR_MULT * 0.072200 * 2^14/125) + spl_convert_to_custom_float_format(c2_calc, &fmt, &dscl_prog_data->easf_matrix_c2); + dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } + /* Set EASF data */ -static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, - bool enable_easf_v, bool enable_easf_h, enum linear_light_scaling lls_pref, - enum spl_pixel_format format) +static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v, + bool enable_easf_h, enum linear_light_scaling lls_pref, + enum spl_pixel_format format, enum system_setup setup, + uint32_t sdr_white_level_nits) { - if (spl_is_yuv420(format)) /* TODO: 0 = RGB, 1 = YUV */ - dscl_prog_data->easf_matrix_mode = 1; - else - dscl_prog_data->easf_matrix_mode = 0; - + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; if (enable_easf_v) { dscl_prog_data->easf_v_en = true; dscl_prog_data->easf_v_ring = 0; - dscl_prog_data->easf_v_sharp_factor = 1; + dscl_prog_data->easf_v_sharp_factor = 0; dscl_prog_data->easf_v_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_v_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_v_bf3_mode = 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_v_bf3_mode = spl_get_v_bf3_mode( + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ minCoef ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_uptilt = - 0x9F00;// FP1.5.10 [minCoef] (-0.036109167214271) + spl_get_3tap_dntilt_uptilt_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTiltMaxVal ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt_max = - 0x24FE; // FP1.5.10 [upTiltMaxVal] ( 0.904556445553545) + spl_get_3tap_uptilt_maxval(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ dnTiltSlope ]*/ dscl_prog_data->easf_v_ringest_3tap_dntilt_slope = - 0x3940; // FP1.5.10 [dnTiltSlope] ( 0.910488988173371) + spl_get_3tap_dntilt_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt1Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt1_slope = - 0x359C; // FP1.5.10 [upTilt1Slope] ( 0.125620179040899) + spl_get_3tap_uptilt1_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Slope ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_slope = - 0x359C; // FP1.5.10 [upTilt2Slope] ( 0.006786817723568) + spl_get_3tap_uptilt2_slope(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10 [ upTilt2Offset ]*/ dscl_prog_data->easf_v_ringest_3tap_uptilt2_offset = - 0x9F00; // FP1.5.10 [upTilt2Offset] (-0.006139059716651) + spl_get_3tap_uptilt2_offset(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_v_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_v_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_v_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_scratch->scl_data.taps.v_taps, + spl_scratch->scl_data.recip_ratios.vert); dscl_prog_data->easf_v_bf_maxa = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_maxb = 63; //Vertical Max BF value A in U0.6 format.Selected if V_FCNTL == 1 dscl_prog_data->easf_v_bf_mina = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 0 dscl_prog_data->easf_v_bf_minb = 0; //Vertical Min BF value A in U0.6 format.Selected if V_FCNTL == 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_v_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_v_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1090,13 +1357,41 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_v_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_v_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_v_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_v_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_v_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_v_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_v_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_v_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_v_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_v_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_v_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_v_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_v_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_v_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_v_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_v_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_v_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_v_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_v_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_v_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1115,11 +1410,11 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_v_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_v_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_v_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_v_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_v_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_v_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_v_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } } else dscl_prog_data->easf_v_en = false; @@ -1127,52 +1422,63 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, if (enable_easf_h) { dscl_prog_data->easf_h_en = true; dscl_prog_data->easf_h_ring = 0; - dscl_prog_data->easf_h_sharp_factor = 1; + dscl_prog_data->easf_h_sharp_factor = 0; dscl_prog_data->easf_h_bf1_en = 1; // 1-bit, BF1 calculation enable, 0=disable, 1=enable dscl_prog_data->easf_h_bf2_mode = 0xF; // 4-bit, BF2 calculation mode - dscl_prog_data->easf_h_bf3_mode = - 2; // 2-bit, BF3 chroma mode correction calculation mode - dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control - dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control - dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + /* 2-bit, BF3 chroma mode correction calculation mode */ + dscl_prog_data->easf_h_bf3_mode = spl_get_h_bf3_mode( + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] */ dscl_prog_data->easf_h_ringest_eventap_reduceg1 = - 0x4000; // FP1.5.10; (2.0) Ring reducer gain for 4 or 6-tap mode [H_REDUCER_GAIN4] + spl_get_reducer_gain4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] */ dscl_prog_data->easf_h_ringest_eventap_reduceg2 = - 0x4100; // FP1.5.10; (2.5) Ring reducer gain for 6-tap mode [V_REDUCER_GAIN6] + spl_get_reducer_gain6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain1 = - 0xB058; // FP1.5.10; (-0.135742) Ring gain for 6-tap set to -139/1024 + spl_get_gainRing4(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); + /* FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 */ dscl_prog_data->easf_h_ringest_eventap_gain2 = - 0xA640; // FP1.5.10; (-0.024414) Ring gain for 6-tap set to -25/1024 + spl_get_gainRing6(spl_scratch->scl_data.taps.h_taps, + spl_scratch->scl_data.recip_ratios.horz); dscl_prog_data->easf_h_bf_maxa = 63; //Horz Max BF value A in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_maxb = 63; //Horz Max BF value B in U0.6 format.Selected if H_FCNTL==1 dscl_prog_data->easf_h_bf_mina = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==0 dscl_prog_data->easf_h_bf_minb = 0; //Horz Min BF value B in U0.6 format.Selected if H_FCNTL==1 - dscl_prog_data->easf_h_bf1_pwl_in_seg0 = -512; // S0.10, BF1 PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 - dscl_prog_data->easf_h_bf1_pwl_in_seg1 = -20; // S0.10, BF1 PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 - dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 - dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = -56; // S7.3, BF1 Slope PWL Segment 3 - dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = -48; // S7.3, BF1 Slope PWL Segment 4 - dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = -240; // S7.3, BF1 Slope PWL Segment 5 - dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = -160; // S7.3, BF1 Slope PWL Segment 6 - dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 - dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 if (lls_pref == LLS_PREF_YES) { + dscl_prog_data->easf_h_bf2_flat1_gain = 4; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 8; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 4; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x600; // S0.10, BF1 PWL Segment 0 = -512 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 3; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7EC; // S0.10, BF1 PWL Segment 1 = -20 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 326; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 16; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7C8; // S7.3, BF1 Slope PWL Segment 3 = -56 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 32; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7D0; // S7.3, BF1 Slope PWL Segment 4 = -48 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 48; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x710; // S7.3, BF1 Slope PWL Segment 5 = -240 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 64; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x760; // S7.3, BF1 Slope PWL Segment 6 = -160 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 80; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x12C5; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1190,12 +1496,40 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x136B; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0C37; // FP0.6.6, BF3 Input value PWL Segment 4 (0.125 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -50; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x4E; // S0.6, BF3 Base PWL Segment 4 = -50 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1200; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0CF7; // FP0.6.6, BF3 Input value PWL Segment 5 (1.0 * 125^3) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } else { + dscl_prog_data->easf_h_bf2_flat1_gain = 13; // U1.3, BF2 Flat1 Gain control + dscl_prog_data->easf_h_bf2_flat2_gain = 15; // U4.0, BF2 Flat2 Gain control + dscl_prog_data->easf_h_bf2_roc_gain = 14; // U2.2, Rate Of Change control + + dscl_prog_data->easf_h_bf1_pwl_in_seg0 = 0x440; // S0.10, BF1 PWL Segment 0 = -960 + dscl_prog_data->easf_h_bf1_pwl_base_seg0 = 0; // U0.6, BF1 Base PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_slope_seg0 = 2; // S7.3, BF1 Slope PWL Segment 0 + dscl_prog_data->easf_h_bf1_pwl_in_seg1 = 0x7C4; // S0.10, BF1 PWL Segment 1 = -60 + dscl_prog_data->easf_h_bf1_pwl_base_seg1 = 12; // U0.6, BF1 Base PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_slope_seg1 = 109; // S7.3, BF1 Slope PWL Segment 1 + dscl_prog_data->easf_h_bf1_pwl_in_seg2 = 0; // S0.10, BF1 PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_base_seg2 = 63; // U0.6, BF1 Base PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_slope_seg2 = 0; // S7.3, BF1 Slope PWL Segment 2 + dscl_prog_data->easf_h_bf1_pwl_in_seg3 = 48; // S0.10, BF1 PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_base_seg3 = 63; // U0.6, BF1 Base PWL Segment 3 + dscl_prog_data->easf_h_bf1_pwl_slope_seg3 = 0x7ED; // S7.3, BF1 Slope PWL Segment 3 = -19 + dscl_prog_data->easf_h_bf1_pwl_in_seg4 = 96; // S0.10, BF1 PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_base_seg4 = 56; // U0.6, BF1 Base PWL Segment 4 + dscl_prog_data->easf_h_bf1_pwl_slope_seg4 = 0x7F0; // S7.3, BF1 Slope PWL Segment 4 = -16 + dscl_prog_data->easf_h_bf1_pwl_in_seg5 = 144; // S0.10, BF1 PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_base_seg5 = 50; // U0.6, BF1 Base PWL Segment 5 + dscl_prog_data->easf_h_bf1_pwl_slope_seg5 = 0x7B0; // S7.3, BF1 Slope PWL Segment 5 = -80 + dscl_prog_data->easf_h_bf1_pwl_in_seg6 = 192; // S0.10, BF1 PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_base_seg6 = 20; // U0.6, BF1 Base PWL Segment 6 + dscl_prog_data->easf_h_bf1_pwl_slope_seg6 = 0x7CB; // S7.3, BF1 Slope PWL Segment 6 = -53 + dscl_prog_data->easf_h_bf1_pwl_in_seg7 = 240; // S0.10, BF1 PWL Segment 7 + dscl_prog_data->easf_h_bf1_pwl_base_seg7 = 0; // U0.6, BF1 Base PWL Segment 7 + dscl_prog_data->easf_h_bf3_pwl_in_set0 = 0x000; // FP0.6.6, BF3 Input value PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_base_set0 = 63; // S0.6, BF3 Base PWL Segment 0 dscl_prog_data->easf_h_bf3_pwl_slope_set0 = 0x0000; // FP1.6.6, BF3 Slope PWL Segment 0 @@ -1213,25 +1547,30 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_h_bf3_pwl_slope_set3 = 0x1878; // FP1.6.6, BF3 Slope PWL Segment 3 dscl_prog_data->easf_h_bf3_pwl_in_set4 = 0x0761; // FP0.6.6, BF3 Input value PWL Segment 4 (0.375) - dscl_prog_data->easf_h_bf3_pwl_base_set4 = -60; // S0.6, BF3 Base PWL Segment 4 + dscl_prog_data->easf_h_bf3_pwl_base_set4 = 0x44; // S0.6, BF3 Base PWL Segment 4 = -60 dscl_prog_data->easf_h_bf3_pwl_slope_set4 = 0x1760; // FP1.6.6, BF3 Slope PWL Segment 4 dscl_prog_data->easf_h_bf3_pwl_in_set5 = 0x0780; // FP0.6.6, BF3 Input value PWL Segment 5 (0.5) - dscl_prog_data->easf_h_bf3_pwl_base_set5 = -63; // S0.6, BF3 Base PWL Segment 5 + dscl_prog_data->easf_h_bf3_pwl_base_set5 = 0x41; // S0.6, BF3 Base PWL Segment 5 = -63 } // if (lls_pref == LLS_PREF_YES) } else dscl_prog_data->easf_h_en = false; if (lls_pref == LLS_PREF_YES) { dscl_prog_data->easf_ltonl_en = 1; // Linear input - dscl_prog_data->easf_matrix_c0 = - 0x504E; // fp1.5.10, C0 coefficient (LN_BT2020: 0.2627 * (2^14)/125 = 34.43750000) - dscl_prog_data->easf_matrix_c1 = - 0x558E; // fp1.5.10, C1 coefficient (LN_BT2020: 0.6780 * (2^14)/125 = 88.87500000) - dscl_prog_data->easf_matrix_c2 = - 0x47C6; // fp1.5.10, C2 coefficient (LN_BT2020: 0.0593 * (2^14)/125 = 7.77343750) - dscl_prog_data->easf_matrix_c3 = - 0x0; // fp1.5.10, C3 coefficient + if ((setup == HDR_L) && (spl_is_rgb8(format))) { + /* Calculate C0-C3 coefficients based on HDR multiplier */ + spl_calculate_c0_c3_hdr(dscl_prog_data, sdr_white_level_nits); + } else { // HDR_L ( DWM ) and SDR_L + dscl_prog_data->easf_matrix_c0 = + 0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720) + dscl_prog_data->easf_matrix_c1 = + 0x55DC; // fp1.5.10, C1 coefficient (LN_rec709: 0.7152 * (2^14)/125 = 93.74269440) + dscl_prog_data->easf_matrix_c2 = + 0x48BB; // fp1.5.10, C2 coefficient (LN_rec709: 0.0722 * (2^14)/125 = 9.46339840) + dscl_prog_data->easf_matrix_c3 = + 0x0; // fp1.5.10, C3 coefficient + } } else { dscl_prog_data->easf_ltonl_en = 0; // Non-Linear input dscl_prog_data->easf_matrix_c0 = @@ -1243,27 +1582,43 @@ static void spl_set_easf_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->easf_matrix_c3 = 0x0; // fp1.5.10, C3 coefficient } + + if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ + dscl_prog_data->easf_matrix_mode = 1; + /* + * 2-bit, BF3 chroma mode correction calculation mode + * Needs to be disabled for YUV420 mode + * Override lookup value + */ + dscl_prog_data->easf_v_bf3_mode = 0; + dscl_prog_data->easf_h_bf3_mode = 0; + } else + dscl_prog_data->easf_matrix_mode = 0; + } + /*Set isharp noise detection */ -static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data) +static void spl_set_isharp_noise_det_mode(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) { // ISHARP_NOISEDET_MODE // 0: 3x5 as VxH // 1: 4x5 as VxH // 2: // 3: 5x5 as VxH - if (dscl_prog_data->taps.v_taps == 6) - dscl_prog_data->isharp_noise_det.mode = 3; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 4) - dscl_prog_data->isharp_noise_det.mode = 1; // ISHARP_NOISEDET_MODE - else if (dscl_prog_data->taps.h_taps == 3) - dscl_prog_data->isharp_noise_det.mode = 0; // ISHARP_NOISEDET_MODE + if (data->taps.v_taps == 6) + dscl_prog_data->isharp_noise_det.mode = 3; + else if (data->taps.v_taps == 4) + dscl_prog_data->isharp_noise_det.mode = 1; + else if (data->taps.v_taps == 3) + dscl_prog_data->isharp_noise_det.mode = 0; }; /* Set Sharpener data */ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, struct adaptive_sharpness adp_sharpness, bool enable_isharp, enum linear_light_scaling lls_pref, enum spl_pixel_format format, - const struct spl_scaler_data *data) + const struct spl_scaler_data *data, struct spl_fixed31_32 ratio, + enum system_setup setup, enum scale_to_sharpness_policy scale_to_sharpness_policy) { /* Turn off sharpener if not required */ if (!enable_isharp) { @@ -1271,11 +1626,19 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, return; } + spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness, + scale_to_sharpness_policy); + memcpy(dscl_prog_data->isharp_delta, spl_get_pregen_filter_isharp_1D_lut(setup), + sizeof(uint32_t) * ISHARP_LUT_TABLE_SIZE); + dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; + dscl_prog_data->isharp_en = 1; // ISHARP_EN - dscl_prog_data->isharp_noise_det.enable = 1; // ISHARP_NOISEDET_EN // Set ISHARP_NOISEDET_MODE if htaps = 6-tap - if (dscl_prog_data->taps.h_taps == 6) - spl_set_isharp_noise_det_mode(dscl_prog_data); // ISHARP_NOISEDET_MODE + if (data->taps.h_taps == 6) { + dscl_prog_data->isharp_noise_det.enable = 1; /* ISHARP_NOISEDET_EN */ + spl_set_isharp_noise_det_mode(dscl_prog_data, data); /* ISHARP_NOISEDET_MODE */ + } else + dscl_prog_data->isharp_noise_det.enable = 0; // ISHARP_NOISEDET_EN // Program noise detection threshold dscl_prog_data->isharp_noise_det.uthreshold = 24; // ISHARP_NOISEDET_UTHRE dscl_prog_data->isharp_noise_det.dthreshold = 4; // ISHARP_NOISEDET_DTHRE @@ -1284,48 +1647,86 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->isharp_noise_det.pwl_end_in = 13; // ISHARP_NOISEDET_PWL_END_IN dscl_prog_data->isharp_noise_det.pwl_slope = 1623; // ISHARP_NOISEDET_PWL_SLOPE - if ((lls_pref == LLS_PREF_NO) && !spl_is_yuv420(format)) /* ISHARP_FMT_MODE */ + if (lls_pref == LLS_PREF_NO) /* ISHARP_FMT_MODE */ dscl_prog_data->isharp_fmt.mode = 1; else dscl_prog_data->isharp_fmt.mode = 0; dscl_prog_data->isharp_fmt.norm = 0x3C00; // ISHARP_FMT_NORM dscl_prog_data->isharp_lba.mode = 0; // ISHARP_LBA_MODE - // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 - dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 - dscl_prog_data->isharp_lba.in_seg[1] = 256; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 - dscl_prog_data->isharp_lba.in_seg[2] = 614; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[2] = -20; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 - dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 - dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format - dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format - // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 - dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format - dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format - switch (adp_sharpness.sharpness) { - case SHARPNESS_LOW: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_0p5x(); - break; - case SHARPNESS_MID: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_1p0x(); - break; - case SHARPNESS_HIGH: - dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); - break; - default: - BREAK_TO_DEBUGGER(); + + if (setup == SDR_L) { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 62; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 130; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 450; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x18D; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -115 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 520; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 520; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 520; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + } else if (setup == HDR_L) { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 32; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 254; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 559; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x10C; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -244 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 592; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format + } else { + // ISHARP_LBA_PWL_SEG0: ISHARP Local Brightness Adjustment PWL Segment 0 + dscl_prog_data->isharp_lba.in_seg[0] = 0; // ISHARP LBA PWL for Seg 0. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[0] = 0; // ISHARP LBA PWL for Seg 0. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[0] = 40; // ISHARP LBA for Seg 0. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG1: ISHARP LBA PWL Segment 1 + dscl_prog_data->isharp_lba.in_seg[1] = 204; // ISHARP LBA PWL for Seg 1. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[1] = 63; // ISHARP LBA PWL for Seg 1. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[1] = 0; // ISHARP LBA for Seg 1. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG2: ISHARP LBA PWL Segment 2 + dscl_prog_data->isharp_lba.in_seg[2] = 818; // ISHARP LBA PWL for Seg 2. INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[2] = 63; // ISHARP LBA PWL for Seg 2. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[2] = 0x1D9; // ISHARP LBA for Seg 2. SLOPE value in S5.3 format = -39 + // ISHARP_LBA_PWL_SEG3: ISHARP LBA PWL Segment 3 + dscl_prog_data->isharp_lba.in_seg[3] = 1023; // ISHARP LBA PWL for Seg 3.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[3] = 0; // ISHARP LBA PWL for Seg 3. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[3] = 0; // ISHARP LBA for Seg 3. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG4: ISHARP LBA PWL Segment 4 + dscl_prog_data->isharp_lba.in_seg[4] = 1023; // ISHARP LBA PWL for Seg 4.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[4] = 0; // ISHARP LBA PWL for Seg 4. BASE value in U0.6 format + dscl_prog_data->isharp_lba.slope_seg[4] = 0; // ISHARP LBA for Seg 4. SLOPE value in S5.3 format + // ISHARP_LBA_PWL_SEG5: ISHARP LBA PWL Segment 5 + dscl_prog_data->isharp_lba.in_seg[5] = 1023; // ISHARP LBA PWL for Seg 5.INPUT value in U0.10 format + dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format } // Program the nldelta soft clip values @@ -1346,62 +1747,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } // Set the values as per lookup table -#ifdef CONFIG_DRM_AMD_DC_FP spl_set_blur_scale_data(dscl_prog_data, data); -#endif -} -static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, - int vscale_ratio, int hscale_ratio, struct spl_taps taps, - enum spl_pixel_format format) -{ - bool enable_isharp = false; - - if (adp_sharpness.enable == false) - return enable_isharp; // Return if adaptive sharpness is disabled - // Is downscaling ? - if (vscale_ratio > 1 || hscale_ratio > 1) { - // END - No iSHARP support for downscaling - return enable_isharp; - } - // Scaling is up to 1:1 (no scaling) or upscaling - - /* Only apply sharpness to NV12 and not P010 */ - if (format != SPL_PIXEL_FORMAT_420BPP8) - return enable_isharp; - - // LB support horizontal taps 4,6 or vertical taps 3, 4, 6 - if (taps.h_taps == 4 || taps.h_taps == 6 || - taps.v_taps == 3 || taps.v_taps == 4 || taps.v_taps == 6) { - // END - iSHARP supported - enable_isharp = true; - } - return enable_isharp; -} - -static bool spl_choose_lls_policy(enum spl_pixel_format format, - enum spl_transfer_func_type tf_type, - enum spl_transfer_func_predefined tf_predefined_type, - enum linear_light_scaling *lls_pref) -{ - if (spl_is_yuv420(format)) { - *lls_pref = LLS_PREF_NO; - if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) - return true; - } else { /* RGB or YUV444 */ - if (tf_type == SPL_TF_TYPE_PREDEFINED) { - if ((tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG) || - (tf_predefined_type == SPL_TRANSFER_FUNCTION_HLG12)) - *lls_pref = LLS_PREF_NO; - else - *lls_pref = LLS_PREF_YES; - return true; - } else if (tf_type == SPL_TF_TYPE_BYPASS) { - *lls_pref = LLS_PREF_YES; - return true; - } - } - *lls_pref = LLS_PREF_NO; - return false; } /* Calculate scaler parameters */ @@ -1410,67 +1756,75 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool res = false; bool enable_easf_v = false; bool enable_easf_h = false; - bool lls_enable_easf = true; int vratio = 0; int hratio = 0; - const struct spl_scaler_data *data = &spl_out->scl_data; + struct spl_scratch spl_scratch; + struct spl_fixed31_32 isharp_scale_ratio; + enum system_setup setup; + bool enable_isharp = false; + const struct spl_scaler_data *data = &spl_scratch.scl_data; + + memset(&spl_scratch, 0, sizeof(struct spl_scratch)); + spl_scratch.scl_data.h_active = spl_in->h_active; + spl_scratch.scl_data.v_active = spl_in->v_active; + // All SPL calls /* recout calculation */ /* depends on h_active */ - spl_calculate_recout(spl_in, spl_out); + spl_calculate_recout(spl_in, &spl_scratch, spl_out); /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, spl_out); + spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, spl_out); + spl_calculate_viewport_size(spl_in, &spl_scratch); res = spl_get_optimal_number_of_taps( spl_in->basic_out.max_downscale_src_width, spl_in, - spl_out, &spl_in->scaling_quality); + &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, + &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario */ if (res) - spl_calculate_inits_and_viewports(spl_in, spl_out); + spl_calculate_inits_and_viewports(spl_in, &spl_scratch); // Handle 3d recout - spl_handle_3d_recout(spl_in, &spl_out->scl_data.recout); + spl_handle_3d_recout(spl_in, &spl_scratch.scl_data.recout); // Clamp - spl_clamp_viewport(&spl_out->scl_data.viewport); + spl_clamp_viewport(&spl_scratch.scl_data.viewport); + + // Save all calculated parameters in dscl_prog_data structure to program hw registers + spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp); if (!res) return res; - /* - * If lls_pref is LLS_PREF_DONT_CARE, then use pixel format and transfer - * function to determine whether to use LINEAR or NONLINEAR scaling - */ - if (spl_in->lls_pref == LLS_PREF_DONT_CARE) - lls_enable_easf = spl_choose_lls_policy(spl_in->basic_in.format, - spl_in->basic_in.tf_type, spl_in->basic_in.tf_predefined_type, - &spl_in->lls_pref); - - // Save all calculated parameters in dscl_prog_data structure to program hw registers - spl_set_dscl_prog_data(spl_in, spl_out); - - vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); - if (!lls_enable_easf || spl_in->disable_easf) { - enable_easf_v = false; - enable_easf_h = false; + if (spl_in->lls_pref == LLS_PREF_YES) { + if (spl_in->is_hdr_on) + setup = HDR_L; + else + setup = SDR_L; } else { - /* Enable EASF on vertical? */ - enable_easf_v = enable_easf(vratio, spl_out->scl_data.taps.v_taps, spl_in->lls_pref, spl_in->prefer_easf); - /* Enable EASF on horizontal? */ - enable_easf_h = enable_easf(hratio, spl_out->scl_data.taps.h_taps, spl_in->lls_pref, spl_in->prefer_easf); + if (spl_in->is_hdr_on) + setup = HDR_NL; + else + setup = SDR_NL; } + // Set EASF - spl_set_easf_data(spl_out->dscl_prog_data, enable_easf_v, enable_easf_h, spl_in->lls_pref, - spl_in->basic_in.format); + spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref, + spl_in->basic_in.format, setup, spl_in->sdr_white_level_nits); + // Set iSHARP - bool enable_isharp = spl_get_isharp_en(spl_in->adaptive_sharpness, vratio, hratio, - spl_out->scl_data.taps, spl_in->basic_in.format); + vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert); + hratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.horz); + if (vratio <= hratio) + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.vert; + else + isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz; + spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp, - spl_in->lls_pref, spl_in->basic_in.format, data); + spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup, + spl_in->debug.scale_to_sharpness_policy); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c new file mode 100644 index 0000000000000..99238644e0a11 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dc_spl_filters.h" + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps) +{ + int num_entries = NUM_PHASES_COEFF * num_taps; + int i; + + for (i = 0; i < num_entries; i++) + *(s1_12_filter + i) = *(s1_10_filter + i) * 4; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h new file mode 100644 index 0000000000000..20439cdbdb105 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_filters.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_FILTERS_H__ +#define __DC_SPL_FILTERS_H__ + +#include "dc_spl_types.h" + +#define NUM_PHASES_COEFF 33 + +void convert_filter_s1_10_to_s1_12(const uint16_t *s1_10_filter, + uint16_t *s1_12_filter, int num_taps); + +#endif /* __DC_SPL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index a5d9a6223d061..e0572252c6404 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -2,6 +2,8 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "spl_debug.h" +#include "dc_spl_filters.h" #include "dc_spl_isharp_filters.h" //======================================== @@ -15,7 +17,7 @@ // C_start = 40.000000 // C_end = 64.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_0[32] = { +static const uint32_t filter_isharp_1D_lut_0[ISHARP_LUT_TABLE_SIZE] = { 0x02010000, 0x0A070503, 0x1614100D, @@ -61,7 +63,7 @@ static const uint32_t filter_isharp_1D_lut_0[32] = { // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_0p5x[32] = { +static const uint32_t filter_isharp_1D_lut_0p5x[ISHARP_LUT_TABLE_SIZE] = { 0x00000000, 0x02020101, 0x06050403, @@ -106,7 +108,7 @@ static const uint32_t filter_isharp_1D_lut_0p5x[32] = { // C_start = 96.000000 // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_1p0x[32] = { +static const uint32_t filter_isharp_1D_lut_1p0x[ISHARP_LUT_TABLE_SIZE] = { 0x01000000, 0x05040302, 0x0B0A0806, @@ -151,7 +153,7 @@ static const uint32_t filter_isharp_1D_lut_1p0x[32] = { // C_start = 96.000000 // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_1p5x[32] = { +static const uint32_t filter_isharp_1D_lut_1p5x[ISHARP_LUT_TABLE_SIZE] = { 0x01010000, 0x07050402, 0x110F0C0A, @@ -196,7 +198,7 @@ static const uint32_t filter_isharp_1D_lut_1p5x[32] = { // C_start = 40.000000 // C_end = 127.000000 //======================================== -static const uint32_t filter_isharp_1D_lut_2p0x[32] = { +static const uint32_t filter_isharp_1D_lut_2p0x[ISHARP_LUT_TABLE_SIZE] = { 0x02010000, 0x0A070503, 0x1614100D, @@ -230,6 +232,53 @@ static const uint32_t filter_isharp_1D_lut_2p0x[32] = { 0x080B0D0E, 0x00020406, }; +//======================================== +// Delta Gain 1DLUT +// LUT content is packed as 4-bytes into one DWORD/entry +// A_start = 0.000000 +// A_end = 10.000000 +// A_gain = 3.000000 +// B_start = 11.000000 +// B_end = 127.000000 +// C_start = 40.000000 +// C_end = 127.000000 +//======================================== +static const uint32_t filter_isharp_1D_lut_3p0x[ISHARP_LUT_TABLE_SIZE] = { +0x03010000, +0x0F0B0805, +0x211E1813, +0x2B292624, +0x3533302E, +0x3E3C3A37, +0x46444240, +0x4D4B4A48, +0x5352504F, +0x59575655, +0x5D5C5B5A, +0x61605F5E, +0x64646362, +0x66666565, +0x68686767, +0x68686868, +0x68686868, +0x67676868, +0x65656666, +0x62636464, +0x5E5F6061, +0x5A5B5C5D, +0x55565759, +0x4F505253, +0x484A4B4D, +0x40424446, +0x373A3C3E, +0x2E303335, +0x2426292B, +0x191B1E21, +0x0D101316, +0x0003060A, +}; + +//======================================== // Wide scaler coefficients //======================================================== // gen_scaler_coeffs.m @@ -284,7 +333,7 @@ static const uint16_t filter_isharp_wide_6tap_64p[198] = { // Blur & Scale LPF // S1.10 //======================================================== -static const uint16_t filter_isharp_bs_4tap_64p[198] = { +static const uint16_t filter_isharp_bs_4tap_in_6_64p[198] = { 0x0000, 0x00E5, 0x0237, 0x00E4, 0x0000, 0x0000, 0x0000, 0x00DE, 0x0237, 0x00EB, 0x0000, 0x0000, 0x0000, 0x00D7, 0x0236, 0x00F2, 0x0001, 0x0000, @@ -319,6 +368,147 @@ static const uint16_t filter_isharp_bs_4tap_64p[198] = { 0x0000, 0x003B, 0x01CF, 0x01C2, 0x0034, 0x0000, 0x0000, 0x0037, 0x01C9, 0x01C9, 0x0037, 0x0000 }; +//======================================================== +// gen_BlurScale_coeffs.m +// 25-Apr-2022 +// 4 +// 64 +// Blur & Scale LPF +// S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_4tap_64p[132] = { +0x00E5, 0x0237, 0x00E4, 0x0000, +0x00DE, 0x0237, 0x00EB, 0x0000, +0x00D7, 0x0236, 0x00F2, 0x0001, +0x00D0, 0x0235, 0x00FA, 0x0001, +0x00C9, 0x0234, 0x0101, 0x0002, +0x00C2, 0x0233, 0x0108, 0x0003, +0x00BB, 0x0232, 0x0110, 0x0003, +0x00B5, 0x0230, 0x0117, 0x0004, +0x00AE, 0x022E, 0x011F, 0x0005, +0x00A8, 0x022C, 0x0126, 0x0006, +0x00A2, 0x022A, 0x012D, 0x0007, +0x009C, 0x0228, 0x0134, 0x0008, +0x0096, 0x0225, 0x013C, 0x0009, +0x0090, 0x0222, 0x0143, 0x000B, +0x008A, 0x021F, 0x014B, 0x000C, +0x0085, 0x021C, 0x0151, 0x000E, +0x007F, 0x0218, 0x015A, 0x000F, +0x007A, 0x0215, 0x0160, 0x0011, +0x0074, 0x0211, 0x0168, 0x0013, +0x006F, 0x020D, 0x016F, 0x0015, +0x006A, 0x0209, 0x0176, 0x0017, +0x0065, 0x0204, 0x017E, 0x0019, +0x0060, 0x0200, 0x0185, 0x001B, +0x005C, 0x01FB, 0x018C, 0x001D, +0x0057, 0x01F6, 0x0193, 0x0020, +0x0053, 0x01F1, 0x019A, 0x0022, +0x004E, 0x01EC, 0x01A1, 0x0025, +0x004A, 0x01E6, 0x01A8, 0x0028, +0x0046, 0x01E1, 0x01AF, 0x002A, +0x0042, 0x01DB, 0x01B6, 0x002D, +0x003F, 0x01D5, 0x01BB, 0x0031, +0x003B, 0x01CF, 0x01C2, 0x0034, +0x0037, 0x01C9, 0x01C9, 0x0037, +}; +//======================================================== +// gen_BlurScale_coeffs.m +// 09-Jun-2022 +// 3 +// 64 +// Blur & Scale LPF +// S1.10 +//======================================================== +static const uint16_t filter_isharp_bs_3tap_64p[99] = { +0x0200, 0x0200, 0x0000, +0x01F6, 0x0206, 0x0004, +0x01EC, 0x020B, 0x0009, +0x01E2, 0x0211, 0x000D, +0x01D8, 0x0216, 0x0012, +0x01CE, 0x021C, 0x0016, +0x01C4, 0x0221, 0x001B, +0x01BA, 0x0226, 0x0020, +0x01B0, 0x022A, 0x0026, +0x01A6, 0x022F, 0x002B, +0x019C, 0x0233, 0x0031, +0x0192, 0x0238, 0x0036, +0x0188, 0x023C, 0x003C, +0x017E, 0x0240, 0x0042, +0x0174, 0x0244, 0x0048, +0x016A, 0x0248, 0x004E, +0x0161, 0x024A, 0x0055, +0x0157, 0x024E, 0x005B, +0x014D, 0x0251, 0x0062, +0x0144, 0x0253, 0x0069, +0x013A, 0x0256, 0x0070, +0x0131, 0x0258, 0x0077, +0x0127, 0x025B, 0x007E, +0x011E, 0x025C, 0x0086, +0x0115, 0x025E, 0x008D, +0x010B, 0x0260, 0x0095, +0x0102, 0x0262, 0x009C, +0x00F9, 0x0263, 0x00A4, +0x00F0, 0x0264, 0x00AC, +0x00E7, 0x0265, 0x00B4, +0x00DF, 0x0264, 0x00BD, +0x00D6, 0x0265, 0x00C5, +0x00CD, 0x0266, 0x00CD, +}; + +/* Converted Blur & Scale coeff tables from S1.10 to S1.12 */ +static uint16_t filter_isharp_bs_4tap_in_6_64p_s1_12[198]; +static uint16_t filter_isharp_bs_4tap_64p_s1_12[132]; +static uint16_t filter_isharp_bs_3tap_64p_s1_12[99]; + +/* Pre-generated 1DLUT for given setup and sharpness level */ +struct isharp_1D_lut_pregen filter_isharp_1D_lut_pregen[NUM_SHARPNESS_SETUPS] = { + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, + { + 0, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + } + }, +}; + +struct scale_ratio_to_sharpness_level_adj sharpness_level_adj[NUM_SHARPNESS_ADJ_LEVELS] = { + {1125, 1000, 0}, + {11, 10, 1}, + {1075, 1000, 2}, + {105, 100, 3}, + {1025, 1000, 4}, + {1, 1, 5}, +}; + const uint32_t *spl_get_filter_isharp_1D_lut_0(void) { return filter_isharp_1D_lut_0; @@ -339,11 +529,229 @@ const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void) { return filter_isharp_1D_lut_2p0x; } +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void) +{ + return filter_isharp_1D_lut_3p0x; +} const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void) { return filter_isharp_wide_6tap_64p; } -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void) +{ + return filter_isharp_bs_4tap_in_6_64p_s1_12; +} +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void) { - return filter_isharp_bs_4tap_64p; + return filter_isharp_bs_4tap_64p_s1_12; } +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void) +{ + return filter_isharp_bs_3tap_64p_s1_12; +} + +static unsigned int spl_calculate_sharpness_level_adj(struct spl_fixed31_32 ratio) +{ + int j; + struct spl_fixed31_32 ratio_level; + struct scale_ratio_to_sharpness_level_adj *lookup_ptr; + unsigned int sharpness_level_down_adj; + + /* + * Adjust sharpness level based on current scaling ratio + * + * We have 5 discrete scaling ratios which we will use to adjust the + * sharpness level down by 1 as we pass each ratio. The ratios + * are + * + * 1.125 upscale and higher - no adj + * 1.100 - under 1.125 - adj level down 1 + * 1.075 - under 1.100 - adj level down 2 + * 1.050 - under 1.075 - adj level down 3 + * 1.025 - under 1.050 - adj level down 4 + * 1.000 - under 1.025 - adj level down 5 + * + */ + j = 0; + sharpness_level_down_adj = 0; + lookup_ptr = sharpness_level_adj; + while (j < NUM_SHARPNESS_ADJ_LEVELS) { + ratio_level = spl_fixpt_from_fraction(lookup_ptr->ratio_numer, + lookup_ptr->ratio_denom); + if (ratio.value >= ratio_level.value) { + sharpness_level_down_adj = lookup_ptr->level_down_adj; + break; + } + lookup_ptr++; + j++; + } + return sharpness_level_down_adj; +} + +static unsigned int spl_calculate_sharpness_level(struct spl_fixed31_32 ratio, + int discrete_sharpness_level, enum system_setup setup, + struct spl_sharpness_range sharpness_range, + enum scale_to_sharpness_policy scale_to_sharpness_policy) +{ + unsigned int sharpness_level = 0; + unsigned int sharpness_level_down_adj = 0; + + int min_sharpness, max_sharpness, mid_sharpness; + + /* + * Adjust sharpness level if policy requires we adjust it based on + * scale ratio. Based on scale ratio, we may adjust the sharpness + * level down by a certain number of steps. We will not select + * a sharpness value of 0 so the lowest sharpness level will be + * 0 or 1 depending on what the min_sharpness is + * + * If the policy is no required, this code maybe removed at a later + * date + */ + switch (setup) { + + case HDR_L: + min_sharpness = sharpness_range.hdr_rgb_min; + max_sharpness = sharpness_range.hdr_rgb_max; + mid_sharpness = sharpness_range.hdr_rgb_mid; + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); + break; + case HDR_NL: + /* currently no use case, use Non-linear SDR values for now */ + case SDR_NL: + min_sharpness = sharpness_range.sdr_yuv_min; + max_sharpness = sharpness_range.sdr_yuv_max; + mid_sharpness = sharpness_range.sdr_yuv_mid; + if (scale_to_sharpness_policy >= SCALE_TO_SHARPNESS_ADJ_YUV) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); + break; + case SDR_L: + default: + min_sharpness = sharpness_range.sdr_rgb_min; + max_sharpness = sharpness_range.sdr_rgb_max; + mid_sharpness = sharpness_range.sdr_rgb_mid; + if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL) + sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio); + break; + } + + if ((min_sharpness == 0) && (sharpness_level_down_adj >= discrete_sharpness_level)) + discrete_sharpness_level = 1; + else if (sharpness_level_down_adj >= discrete_sharpness_level) + discrete_sharpness_level = 0; + else + discrete_sharpness_level -= sharpness_level_down_adj; + + int lower_half_step_size = (mid_sharpness - min_sharpness) / 5; + int upper_half_step_size = (max_sharpness - mid_sharpness) / 5; + + // lower half linear approximation + if (discrete_sharpness_level < 5) + sharpness_level = min_sharpness + (lower_half_step_size * discrete_sharpness_level); + // upper half linear approximation + else + sharpness_level = mid_sharpness + (upper_half_step_size * (discrete_sharpness_level - 5)); + + return sharpness_level; +} + +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy) +{ + uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst; + struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level; + int j; + int size_1dlut; + int sharp_calc_int; + uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE]; + + /* Custom sharpnessX1000 value */ + unsigned int sharpnessX1000 = spl_calculate_sharpness_level(ratio, + sharpness.sharpness_level, setup, + sharpness.sharpness_range, scale_to_sharpness_policy); + sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000); + + /* + * Check if pregen 1dlut table is already precalculated + * If numer/denom is different, then recalculate + */ + if ((filter_isharp_1D_lut_pregen[setup].sharpness_numer == sharpnessX1000) && + (filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000)) + return; + + /* + * Calculate LUT_128_gained with this equation: + * + * LUT_128_gained[i] = (uint8)(0.5 + min(255,(double)(LUT_128[i])*sharpLevel/iGain)) + * where LUT_128[i] is contents of 3p0x isharp 1dlut + * where sharpLevel is desired sharpness level + * where iGain is base sharpness level 3.0 + * where LUT_128_gained[i] is adjusted 1dlut value based on desired sharpness level + */ + byte_ptr_1dlut_src = (uint8_t *)filter_isharp_1D_lut_3p0x; + byte_ptr_1dlut_dst = (uint8_t *)filter_pregen_store; + size_1dlut = sizeof(filter_isharp_1D_lut_3p0x); + memset(byte_ptr_1dlut_dst, 0, size_1dlut); + for (j = 0; j < size_1dlut; j++) { + sharp_base = spl_fixpt_from_int((int)*byte_ptr_1dlut_src); + sharp_calc = spl_fixpt_mul(sharp_base, sharp_level); + sharp_calc = spl_fixpt_div(sharp_calc, spl_fixpt_from_int(3)); + sharp_calc = spl_fixpt_min(spl_fixpt_from_int(255), sharp_calc); + sharp_calc = spl_fixpt_add(sharp_calc, spl_fixpt_from_fraction(1, 2)); + sharp_calc_int = spl_fixpt_floor(sharp_calc); + /* Clamp it at 0x7F so it doesn't wrap */ + if (sharp_calc_int > 127) + sharp_calc_int = 127; + *byte_ptr_1dlut_dst = (uint8_t)sharp_calc_int; + + byte_ptr_1dlut_src++; + byte_ptr_1dlut_dst++; + } + + /* Update 1dlut table and sharpness level */ + memcpy((void *)filter_isharp_1D_lut_pregen[setup].value, (void *)filter_pregen_store, size_1dlut); + filter_isharp_1D_lut_pregen[setup].sharpness_numer = sharpnessX1000; + filter_isharp_1D_lut_pregen[setup].sharpness_denom = 1000; +} + +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup) +{ + return filter_isharp_1D_lut_pregen[setup].value; +} + +void spl_init_blur_scale_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(filter_isharp_bs_3tap_64p, + filter_isharp_bs_3tap_64p_s1_12, 3); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_64p, + filter_isharp_bs_4tap_64p_s1_12, 4); + convert_filter_s1_10_to_s1_12(filter_isharp_bs_4tap_in_6_64p, + filter_isharp_bs_4tap_in_6_64p_s1_12, 6); +} + +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) +{ + if (taps == 3) + return spl_get_filter_isharp_bs_3tap_64p(); + else if (taps == 4) + return spl_get_filter_isharp_bs_4tap_64p(); + else if (taps == 6) + return spl_get_filter_isharp_bs_4tap_in_6_64p(); + else { + /* should never happen, bug */ + SPL_BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data) +{ + dscl_prog_data->filter_blur_scale_h = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.h_taps); + + dscl_prog_data->filter_blur_scale_v = + spl_dscl_get_blur_scale_coeffs_64p(data->taps.v_taps); +} + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index 1aaf4c50c1bc2..89af91e19b6ce 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -12,6 +12,39 @@ const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_2p0x(void); -const uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +const uint32_t *spl_get_filter_isharp_1D_lut_3p0x(void); +uint16_t *spl_get_filter_isharp_bs_4tap_in_6_64p(void); +uint16_t *spl_get_filter_isharp_bs_4tap_64p(void); +uint16_t *spl_get_filter_isharp_bs_3tap_64p(void); const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void); +uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps); + +#define NUM_SHARPNESS_ADJ_LEVELS 6 +struct scale_ratio_to_sharpness_level_adj { + unsigned int ratio_numer; + unsigned int ratio_denom; + unsigned int level_down_adj; /* adjust sharpness level down */ +}; + +struct isharp_1D_lut_pregen { + unsigned int sharpness_numer; + unsigned int sharpness_denom; + uint32_t value[ISHARP_LUT_TABLE_SIZE]; +}; + +enum system_setup { + SDR_NL = 0, + SDR_L, + HDR_NL, + HDR_L, + NUM_SHARPNESS_SETUPS +}; + +void spl_init_blur_scale_coeffs(void); +void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data); + +void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, + struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy); +uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup); #endif /* __DC_SPL_ISHARP_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c new file mode 100644 index 0000000000000..09bf82f7d4688 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.c @@ -0,0 +1,1726 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "spl_debug.h" +#include "dc_spl_filters.h" +#include "dc_spl_scl_filters.h" +#include "dc_spl_scl_easf_filters.h" + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.3_p_10qb_ +// 3 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_30[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EC, 0x020B, 0x0009, + 0x01E2, 0x0211, 0x000D, + 0x01D8, 0x0216, 0x0012, + 0x01CE, 0x021C, 0x0016, + 0x01C4, 0x0221, 0x001B, + 0x01BA, 0x0226, 0x0020, + 0x01B0, 0x022A, 0x0026, + 0x01A6, 0x022F, 0x002B, + 0x019C, 0x0233, 0x0031, + 0x0192, 0x0238, 0x0036, + 0x0188, 0x023C, 0x003C, + 0x017E, 0x0240, 0x0042, + 0x0174, 0x0244, 0x0048, + 0x016A, 0x0248, 0x004E, + 0x0161, 0x024A, 0x0055, + 0x0157, 0x024E, 0x005B, + 0x014D, 0x0251, 0x0062, + 0x0144, 0x0253, 0x0069, + 0x013A, 0x0256, 0x0070, + 0x0131, 0x0258, 0x0077, + 0x0127, 0x025B, 0x007E, + 0x011E, 0x025C, 0x0086, + 0x0115, 0x025E, 0x008D, + 0x010B, 0x0260, 0x0095, + 0x0102, 0x0262, 0x009C, + 0x00F9, 0x0263, 0x00A4, + 0x00F0, 0x0264, 0x00AC, + 0x00E7, 0x0265, 0x00B4, + 0x00DF, 0x0264, 0x00BD, + 0x00D6, 0x0265, 0x00C5, + 0x00CD, 0x0266, 0x00CD, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.4_p_10qb_ +// 3 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_40[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F6, 0x0206, 0x0004, + 0x01EB, 0x020E, 0x0007, + 0x01E1, 0x0214, 0x000B, + 0x01D7, 0x021A, 0x000F, + 0x01CD, 0x0220, 0x0013, + 0x01C2, 0x0226, 0x0018, + 0x01B8, 0x022C, 0x001C, + 0x01AE, 0x0231, 0x0021, + 0x01A3, 0x0237, 0x0026, + 0x0199, 0x023C, 0x002B, + 0x018F, 0x0240, 0x0031, + 0x0185, 0x0245, 0x0036, + 0x017A, 0x024A, 0x003C, + 0x0170, 0x024F, 0x0041, + 0x0166, 0x0253, 0x0047, + 0x015C, 0x0257, 0x004D, + 0x0152, 0x025A, 0x0054, + 0x0148, 0x025E, 0x005A, + 0x013E, 0x0261, 0x0061, + 0x0134, 0x0264, 0x0068, + 0x012B, 0x0266, 0x006F, + 0x0121, 0x0269, 0x0076, + 0x0117, 0x026C, 0x007D, + 0x010E, 0x026E, 0x0084, + 0x0104, 0x0270, 0x008C, + 0x00FB, 0x0271, 0x0094, + 0x00F2, 0x0272, 0x009C, + 0x00E9, 0x0273, 0x00A4, + 0x00E0, 0x0274, 0x00AC, + 0x00D7, 0x0275, 0x00B4, + 0x00CE, 0x0275, 0x00BD, + 0x00C5, 0x0276, 0x00C5, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.5_p_10qb_ +// 3 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_50[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F5, 0x0209, 0x0002, + 0x01EA, 0x0211, 0x0005, + 0x01DF, 0x021A, 0x0007, + 0x01D4, 0x0222, 0x000A, + 0x01C9, 0x022A, 0x000D, + 0x01BE, 0x0232, 0x0010, + 0x01B3, 0x0239, 0x0014, + 0x01A8, 0x0241, 0x0017, + 0x019D, 0x0248, 0x001B, + 0x0192, 0x024F, 0x001F, + 0x0187, 0x0255, 0x0024, + 0x017C, 0x025C, 0x0028, + 0x0171, 0x0262, 0x002D, + 0x0166, 0x0268, 0x0032, + 0x015B, 0x026E, 0x0037, + 0x0150, 0x0273, 0x003D, + 0x0146, 0x0278, 0x0042, + 0x013B, 0x027D, 0x0048, + 0x0130, 0x0282, 0x004E, + 0x0126, 0x0286, 0x0054, + 0x011B, 0x028A, 0x005B, + 0x0111, 0x028D, 0x0062, + 0x0107, 0x0290, 0x0069, + 0x00FD, 0x0293, 0x0070, + 0x00F3, 0x0296, 0x0077, + 0x00E9, 0x0298, 0x007F, + 0x00DF, 0x029A, 0x0087, + 0x00D5, 0x029C, 0x008F, + 0x00CC, 0x029D, 0x0097, + 0x00C3, 0x029E, 0x009F, + 0x00BA, 0x029E, 0x00A8, + 0x00B1, 0x029E, 0x00B1, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.6_p_10qb_ +// 3 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_60[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F4, 0x020B, 0x0001, + 0x01E8, 0x0216, 0x0002, + 0x01DC, 0x0221, 0x0003, + 0x01D0, 0x022B, 0x0005, + 0x01C4, 0x0235, 0x0007, + 0x01B8, 0x0240, 0x0008, + 0x01AC, 0x0249, 0x000B, + 0x01A0, 0x0253, 0x000D, + 0x0194, 0x025C, 0x0010, + 0x0188, 0x0265, 0x0013, + 0x017C, 0x026E, 0x0016, + 0x0170, 0x0277, 0x0019, + 0x0164, 0x027F, 0x001D, + 0x0158, 0x0287, 0x0021, + 0x014C, 0x028F, 0x0025, + 0x0140, 0x0297, 0x0029, + 0x0135, 0x029D, 0x002E, + 0x0129, 0x02A4, 0x0033, + 0x011D, 0x02AB, 0x0038, + 0x0112, 0x02B0, 0x003E, + 0x0107, 0x02B5, 0x0044, + 0x00FC, 0x02BA, 0x004A, + 0x00F1, 0x02BF, 0x0050, + 0x00E6, 0x02C3, 0x0057, + 0x00DB, 0x02C7, 0x005E, + 0x00D1, 0x02CA, 0x0065, + 0x00C7, 0x02CC, 0x006D, + 0x00BD, 0x02CE, 0x0075, + 0x00B3, 0x02D0, 0x007D, + 0x00A9, 0x02D2, 0x0085, + 0x00A0, 0x02D2, 0x008E, + 0x0097, 0x02D2, 0x0097, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.7_p_10qb_ +// 3 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_70[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F3, 0x020D, 0x0000, + 0x01E5, 0x021B, 0x0000, + 0x01D8, 0x0228, 0x0000, + 0x01CB, 0x0235, 0x0000, + 0x01BD, 0x0243, 0x0000, + 0x01B0, 0x024F, 0x0001, + 0x01A2, 0x025C, 0x0002, + 0x0195, 0x0268, 0x0003, + 0x0187, 0x0275, 0x0004, + 0x017A, 0x0280, 0x0006, + 0x016D, 0x028C, 0x0007, + 0x015F, 0x0298, 0x0009, + 0x0152, 0x02A2, 0x000C, + 0x0145, 0x02AD, 0x000E, + 0x0138, 0x02B7, 0x0011, + 0x012B, 0x02C0, 0x0015, + 0x011E, 0x02CA, 0x0018, + 0x0111, 0x02D3, 0x001C, + 0x0105, 0x02DB, 0x0020, + 0x00F8, 0x02E3, 0x0025, + 0x00EC, 0x02EA, 0x002A, + 0x00E0, 0x02F1, 0x002F, + 0x00D5, 0x02F6, 0x0035, + 0x00C9, 0x02FC, 0x003B, + 0x00BE, 0x0301, 0x0041, + 0x00B3, 0x0305, 0x0048, + 0x00A8, 0x0309, 0x004F, + 0x009E, 0x030C, 0x0056, + 0x0094, 0x030E, 0x005E, + 0x008A, 0x0310, 0x0066, + 0x0081, 0x0310, 0x006F, + 0x0077, 0x0312, 0x0077, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.8_p_10qb_ +// 3 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_80[99] = { + 0x0200, 0x0200, 0x0000, + 0x01F1, 0x0210, 0x0FFF, + 0x01E2, 0x0220, 0x0FFE, + 0x01D2, 0x0232, 0x0FFC, + 0x01C3, 0x0241, 0x0FFC, + 0x01B4, 0x0251, 0x0FFB, + 0x01A4, 0x0262, 0x0FFA, + 0x0195, 0x0271, 0x0FFA, + 0x0186, 0x0281, 0x0FF9, + 0x0176, 0x0291, 0x0FF9, + 0x0167, 0x02A0, 0x0FF9, + 0x0158, 0x02AE, 0x0FFA, + 0x0149, 0x02BD, 0x0FFA, + 0x013A, 0x02CB, 0x0FFB, + 0x012C, 0x02D7, 0x0FFD, + 0x011D, 0x02E5, 0x0FFE, + 0x010F, 0x02F1, 0x0000, + 0x0101, 0x02FD, 0x0002, + 0x00F3, 0x0308, 0x0005, + 0x00E5, 0x0313, 0x0008, + 0x00D8, 0x031D, 0x000B, + 0x00CB, 0x0326, 0x000F, + 0x00BE, 0x032F, 0x0013, + 0x00B2, 0x0337, 0x0017, + 0x00A6, 0x033E, 0x001C, + 0x009A, 0x0345, 0x0021, + 0x008F, 0x034A, 0x0027, + 0x0084, 0x034F, 0x002D, + 0x0079, 0x0353, 0x0034, + 0x006F, 0x0356, 0x003B, + 0x0065, 0x0358, 0x0043, + 0x005C, 0x0359, 0x004B, + 0x0053, 0x035A, 0x0053, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_0.9_p_10qb_ +// 3 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_0_90[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EE, 0x0214, 0x0FFE, + 0x01DC, 0x0228, 0x0FFC, + 0x01CA, 0x023C, 0x0FFA, + 0x01B9, 0x024F, 0x0FF8, + 0x01A7, 0x0262, 0x0FF7, + 0x0195, 0x0276, 0x0FF5, + 0x0183, 0x028A, 0x0FF3, + 0x0172, 0x029C, 0x0FF2, + 0x0160, 0x02AF, 0x0FF1, + 0x014F, 0x02C2, 0x0FEF, + 0x013E, 0x02D4, 0x0FEE, + 0x012D, 0x02E5, 0x0FEE, + 0x011C, 0x02F7, 0x0FED, + 0x010C, 0x0307, 0x0FED, + 0x00FB, 0x0318, 0x0FED, + 0x00EC, 0x0327, 0x0FED, + 0x00DC, 0x0336, 0x0FEE, + 0x00CD, 0x0344, 0x0FEF, + 0x00BE, 0x0352, 0x0FF0, + 0x00B0, 0x035E, 0x0FF2, + 0x00A2, 0x036A, 0x0FF4, + 0x0095, 0x0375, 0x0FF6, + 0x0088, 0x037F, 0x0FF9, + 0x007B, 0x0388, 0x0FFD, + 0x006F, 0x0391, 0x0000, + 0x0064, 0x0397, 0x0005, + 0x0059, 0x039D, 0x000A, + 0x004E, 0x03A3, 0x000F, + 0x0045, 0x03A6, 0x0015, + 0x003B, 0x03A9, 0x001C, + 0x0033, 0x03AA, 0x0023, + 0x002A, 0x03AC, 0x002A, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 3t_64p_LanczosEd_p_1_p_10qb_ +// 3 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_3tap_64p_ratio_1_00[99] = { + 0x0200, 0x0200, 0x0000, + 0x01EB, 0x0217, 0x0FFE, + 0x01D5, 0x022F, 0x0FFC, + 0x01C0, 0x0247, 0x0FF9, + 0x01AB, 0x025E, 0x0FF7, + 0x0196, 0x0276, 0x0FF4, + 0x0181, 0x028D, 0x0FF2, + 0x016C, 0x02A5, 0x0FEF, + 0x0158, 0x02BB, 0x0FED, + 0x0144, 0x02D1, 0x0FEB, + 0x0130, 0x02E8, 0x0FE8, + 0x011C, 0x02FE, 0x0FE6, + 0x0109, 0x0313, 0x0FE4, + 0x00F6, 0x0328, 0x0FE2, + 0x00E4, 0x033C, 0x0FE0, + 0x00D2, 0x034F, 0x0FDF, + 0x00C0, 0x0363, 0x0FDD, + 0x00B0, 0x0374, 0x0FDC, + 0x009F, 0x0385, 0x0FDC, + 0x0090, 0x0395, 0x0FDB, + 0x0081, 0x03A4, 0x0FDB, + 0x0072, 0x03B3, 0x0FDB, + 0x0064, 0x03C0, 0x0FDC, + 0x0057, 0x03CC, 0x0FDD, + 0x004B, 0x03D6, 0x0FDF, + 0x003F, 0x03E0, 0x0FE1, + 0x0034, 0x03E8, 0x0FE4, + 0x002A, 0x03EF, 0x0FE7, + 0x0020, 0x03F5, 0x0FEB, + 0x0017, 0x03FA, 0x0FEF, + 0x000F, 0x03FD, 0x0FF4, + 0x0007, 0x03FF, 0x0FFA, + 0x0000, 0x0400, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.3_p_10qb_ +// 4 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_30[132] = { + 0x0104, 0x01F8, 0x0104, 0x0000, + 0x00FE, 0x01F7, 0x010A, 0x0001, + 0x00F8, 0x01F6, 0x010F, 0x0003, + 0x00F2, 0x01F5, 0x0114, 0x0005, + 0x00EB, 0x01F4, 0x011B, 0x0006, + 0x00E5, 0x01F3, 0x0120, 0x0008, + 0x00DF, 0x01F2, 0x0125, 0x000A, + 0x00DA, 0x01F0, 0x012A, 0x000C, + 0x00D4, 0x01EE, 0x0130, 0x000E, + 0x00CE, 0x01ED, 0x0135, 0x0010, + 0x00C8, 0x01EB, 0x013A, 0x0013, + 0x00C2, 0x01E9, 0x0140, 0x0015, + 0x00BD, 0x01E7, 0x0145, 0x0017, + 0x00B7, 0x01E5, 0x014A, 0x001A, + 0x00B1, 0x01E2, 0x0151, 0x001C, + 0x00AC, 0x01E0, 0x0155, 0x001F, + 0x00A7, 0x01DD, 0x015A, 0x0022, + 0x00A1, 0x01DB, 0x015F, 0x0025, + 0x009C, 0x01D8, 0x0165, 0x0027, + 0x0097, 0x01D5, 0x016A, 0x002A, + 0x0092, 0x01D2, 0x016E, 0x002E, + 0x008C, 0x01CF, 0x0174, 0x0031, + 0x0087, 0x01CC, 0x0179, 0x0034, + 0x0083, 0x01C9, 0x017D, 0x0037, + 0x007E, 0x01C5, 0x0182, 0x003B, + 0x0079, 0x01C2, 0x0187, 0x003E, + 0x0074, 0x01BE, 0x018C, 0x0042, + 0x0070, 0x01BA, 0x0190, 0x0046, + 0x006B, 0x01B7, 0x0195, 0x0049, + 0x0066, 0x01B3, 0x019A, 0x004D, + 0x0062, 0x01AF, 0x019E, 0x0051, + 0x005E, 0x01AB, 0x01A2, 0x0055, + 0x005A, 0x01A6, 0x01A6, 0x005A, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.4_p_10qb_ +// 4 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_40[132] = { + 0x00FB, 0x0209, 0x00FC, 0x0000, + 0x00F5, 0x0209, 0x0101, 0x0001, + 0x00EE, 0x0208, 0x0108, 0x0002, + 0x00E8, 0x0207, 0x010E, 0x0003, + 0x00E2, 0x0206, 0x0114, 0x0004, + 0x00DB, 0x0205, 0x011A, 0x0006, + 0x00D5, 0x0204, 0x0120, 0x0007, + 0x00CF, 0x0203, 0x0125, 0x0009, + 0x00C9, 0x0201, 0x012C, 0x000A, + 0x00C3, 0x01FF, 0x0132, 0x000C, + 0x00BD, 0x01FD, 0x0138, 0x000E, + 0x00B7, 0x01FB, 0x013E, 0x0010, + 0x00B1, 0x01F9, 0x0144, 0x0012, + 0x00AC, 0x01F7, 0x0149, 0x0014, + 0x00A6, 0x01F4, 0x0150, 0x0016, + 0x00A0, 0x01F2, 0x0156, 0x0018, + 0x009B, 0x01EF, 0x015C, 0x001A, + 0x0095, 0x01EC, 0x0162, 0x001D, + 0x0090, 0x01E9, 0x0168, 0x001F, + 0x008B, 0x01E6, 0x016D, 0x0022, + 0x0085, 0x01E3, 0x0173, 0x0025, + 0x0080, 0x01DF, 0x0179, 0x0028, + 0x007B, 0x01DC, 0x017E, 0x002B, + 0x0076, 0x01D8, 0x0184, 0x002E, + 0x0071, 0x01D4, 0x018A, 0x0031, + 0x006D, 0x01D1, 0x018E, 0x0034, + 0x0068, 0x01CD, 0x0193, 0x0038, + 0x0063, 0x01C8, 0x019A, 0x003B, + 0x005F, 0x01C4, 0x019E, 0x003F, + 0x005B, 0x01C0, 0x01A3, 0x0042, + 0x0056, 0x01BB, 0x01A9, 0x0046, + 0x0052, 0x01B7, 0x01AD, 0x004A, + 0x004E, 0x01B2, 0x01B2, 0x004E, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.5_p_10qb_ +// 4 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_50[132] = { + 0x00E5, 0x0236, 0x00E5, 0x0000, + 0x00DE, 0x0235, 0x00ED, 0x0000, + 0x00D7, 0x0235, 0x00F4, 0x0000, + 0x00D0, 0x0235, 0x00FB, 0x0000, + 0x00C9, 0x0234, 0x0102, 0x0001, + 0x00C2, 0x0233, 0x010A, 0x0001, + 0x00BC, 0x0232, 0x0111, 0x0001, + 0x00B5, 0x0230, 0x0119, 0x0002, + 0x00AE, 0x022F, 0x0121, 0x0002, + 0x00A8, 0x022D, 0x0128, 0x0003, + 0x00A2, 0x022B, 0x012F, 0x0004, + 0x009B, 0x0229, 0x0137, 0x0005, + 0x0095, 0x0226, 0x013F, 0x0006, + 0x008F, 0x0224, 0x0146, 0x0007, + 0x0089, 0x0221, 0x014E, 0x0008, + 0x0083, 0x021E, 0x0155, 0x000A, + 0x007E, 0x021B, 0x015C, 0x000B, + 0x0078, 0x0217, 0x0164, 0x000D, + 0x0072, 0x0213, 0x016D, 0x000E, + 0x006D, 0x0210, 0x0173, 0x0010, + 0x0068, 0x020C, 0x017A, 0x0012, + 0x0063, 0x0207, 0x0182, 0x0014, + 0x005E, 0x0203, 0x0189, 0x0016, + 0x0059, 0x01FE, 0x0191, 0x0018, + 0x0054, 0x01F9, 0x0198, 0x001B, + 0x0050, 0x01F4, 0x019F, 0x001D, + 0x004B, 0x01EF, 0x01A6, 0x0020, + 0x0047, 0x01EA, 0x01AC, 0x0023, + 0x0043, 0x01E4, 0x01B3, 0x0026, + 0x003F, 0x01DF, 0x01B9, 0x0029, + 0x003B, 0x01D9, 0x01C0, 0x002C, + 0x0037, 0x01D3, 0x01C6, 0x0030, + 0x0033, 0x01CD, 0x01CD, 0x0033, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.6_p_10qb_ +// 4 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_60[132] = { + 0x00C8, 0x026F, 0x00C9, 0x0000, + 0x00C0, 0x0270, 0x00D1, 0x0FFF, + 0x00B8, 0x0270, 0x00D9, 0x0FFF, + 0x00B1, 0x0270, 0x00E1, 0x0FFE, + 0x00A9, 0x026F, 0x00EB, 0x0FFD, + 0x00A2, 0x026E, 0x00F3, 0x0FFD, + 0x009A, 0x026D, 0x00FD, 0x0FFC, + 0x0093, 0x026C, 0x0105, 0x0FFC, + 0x008C, 0x026A, 0x010F, 0x0FFB, + 0x0085, 0x0268, 0x0118, 0x0FFB, + 0x007E, 0x0265, 0x0122, 0x0FFB, + 0x0078, 0x0263, 0x012A, 0x0FFB, + 0x0071, 0x0260, 0x0134, 0x0FFB, + 0x006B, 0x025C, 0x013E, 0x0FFB, + 0x0065, 0x0259, 0x0147, 0x0FFB, + 0x005F, 0x0255, 0x0151, 0x0FFB, + 0x0059, 0x0251, 0x015A, 0x0FFC, + 0x0054, 0x024D, 0x0163, 0x0FFC, + 0x004E, 0x0248, 0x016D, 0x0FFD, + 0x0049, 0x0243, 0x0176, 0x0FFE, + 0x0044, 0x023E, 0x017F, 0x0FFF, + 0x003F, 0x0238, 0x0189, 0x0000, + 0x003A, 0x0232, 0x0193, 0x0001, + 0x0036, 0x022C, 0x019C, 0x0002, + 0x0031, 0x0226, 0x01A5, 0x0004, + 0x002D, 0x021F, 0x01AF, 0x0005, + 0x0029, 0x0218, 0x01B8, 0x0007, + 0x0025, 0x0211, 0x01C1, 0x0009, + 0x0022, 0x020A, 0x01C9, 0x000B, + 0x001E, 0x0203, 0x01D2, 0x000D, + 0x001B, 0x01FB, 0x01DA, 0x0010, + 0x0018, 0x01F3, 0x01E3, 0x0012, + 0x0015, 0x01EB, 0x01EB, 0x0015, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.7_p_10qb_ +// 4 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_70[132] = { + 0x00A3, 0x02B9, 0x00A4, 0x0000, + 0x009A, 0x02BA, 0x00AD, 0x0FFF, + 0x0092, 0x02BA, 0x00B6, 0x0FFE, + 0x0089, 0x02BA, 0x00C1, 0x0FFC, + 0x0081, 0x02B9, 0x00CB, 0x0FFB, + 0x0079, 0x02B8, 0x00D5, 0x0FFA, + 0x0071, 0x02B7, 0x00DF, 0x0FF9, + 0x0069, 0x02B5, 0x00EA, 0x0FF8, + 0x0062, 0x02B3, 0x00F4, 0x0FF7, + 0x005B, 0x02B0, 0x00FF, 0x0FF6, + 0x0054, 0x02AD, 0x010B, 0x0FF4, + 0x004D, 0x02A9, 0x0117, 0x0FF3, + 0x0046, 0x02A5, 0x0123, 0x0FF2, + 0x0040, 0x02A1, 0x012D, 0x0FF2, + 0x003A, 0x029C, 0x0139, 0x0FF1, + 0x0034, 0x0297, 0x0145, 0x0FF0, + 0x002F, 0x0292, 0x0150, 0x0FEF, + 0x0029, 0x028C, 0x015C, 0x0FEF, + 0x0024, 0x0285, 0x0169, 0x0FEE, + 0x001F, 0x027F, 0x0174, 0x0FEE, + 0x001B, 0x0278, 0x017F, 0x0FEE, + 0x0016, 0x0270, 0x018D, 0x0FED, + 0x0012, 0x0268, 0x0199, 0x0FED, + 0x000E, 0x0260, 0x01A4, 0x0FEE, + 0x000B, 0x0258, 0x01AF, 0x0FEE, + 0x0007, 0x024F, 0x01BC, 0x0FEE, + 0x0004, 0x0246, 0x01C7, 0x0FEF, + 0x0001, 0x023D, 0x01D3, 0x0FEF, + 0x0FFE, 0x0233, 0x01DF, 0x0FF0, + 0x0FFC, 0x0229, 0x01EA, 0x0FF1, + 0x0FFA, 0x021F, 0x01F4, 0x0FF3, + 0x0FF8, 0x0215, 0x01FF, 0x0FF4, + 0x0FF6, 0x020A, 0x020A, 0x0FF6, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.8_p_10qb_ +// 4 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_80[132] = { + 0x0075, 0x0315, 0x0076, 0x0000, + 0x006C, 0x0316, 0x007F, 0x0FFF, + 0x0062, 0x0316, 0x008A, 0x0FFE, + 0x0059, 0x0315, 0x0096, 0x0FFC, + 0x0050, 0x0314, 0x00A1, 0x0FFB, + 0x0048, 0x0312, 0x00AD, 0x0FF9, + 0x0040, 0x0310, 0x00B8, 0x0FF8, + 0x0038, 0x030D, 0x00C5, 0x0FF6, + 0x0030, 0x030A, 0x00D1, 0x0FF5, + 0x0029, 0x0306, 0x00DE, 0x0FF3, + 0x0022, 0x0301, 0x00EB, 0x0FF2, + 0x001C, 0x02FC, 0x00F8, 0x0FF0, + 0x0015, 0x02F7, 0x0106, 0x0FEE, + 0x0010, 0x02F1, 0x0112, 0x0FED, + 0x000A, 0x02EA, 0x0121, 0x0FEB, + 0x0005, 0x02E3, 0x012F, 0x0FE9, + 0x0000, 0x02DB, 0x013D, 0x0FE8, + 0x0FFB, 0x02D3, 0x014C, 0x0FE6, + 0x0FF7, 0x02CA, 0x015A, 0x0FE5, + 0x0FF3, 0x02C1, 0x0169, 0x0FE3, + 0x0FF0, 0x02B7, 0x0177, 0x0FE2, + 0x0FEC, 0x02AD, 0x0186, 0x0FE1, + 0x0FE9, 0x02A2, 0x0196, 0x0FDF, + 0x0FE7, 0x0297, 0x01A4, 0x0FDE, + 0x0FE4, 0x028C, 0x01B3, 0x0FDD, + 0x0FE2, 0x0280, 0x01C2, 0x0FDC, + 0x0FE0, 0x0274, 0x01D0, 0x0FDC, + 0x0FDF, 0x0268, 0x01DE, 0x0FDB, + 0x0FDD, 0x025B, 0x01EE, 0x0FDA, + 0x0FDC, 0x024E, 0x01FC, 0x0FDA, + 0x0FDB, 0x0241, 0x020A, 0x0FDA, + 0x0FDB, 0x0233, 0x0218, 0x0FDA, + 0x0FDA, 0x0226, 0x0226, 0x0FDA, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_0.9_p_10qb_ +// 4 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_0_90[132] = { + 0x003F, 0x0383, 0x003E, 0x0000, + 0x0034, 0x0383, 0x004A, 0x0FFF, + 0x002B, 0x0383, 0x0054, 0x0FFE, + 0x0021, 0x0381, 0x0061, 0x0FFD, + 0x0019, 0x037F, 0x006C, 0x0FFC, + 0x0010, 0x037C, 0x0079, 0x0FFB, + 0x0008, 0x0378, 0x0086, 0x0FFA, + 0x0001, 0x0374, 0x0093, 0x0FF8, + 0x0FFA, 0x036E, 0x00A1, 0x0FF7, + 0x0FF3, 0x0368, 0x00B0, 0x0FF5, + 0x0FED, 0x0361, 0x00BF, 0x0FF3, + 0x0FE8, 0x035A, 0x00CD, 0x0FF1, + 0x0FE2, 0x0352, 0x00DC, 0x0FF0, + 0x0FDE, 0x0349, 0x00EB, 0x0FEE, + 0x0FD9, 0x033F, 0x00FC, 0x0FEC, + 0x0FD5, 0x0335, 0x010D, 0x0FE9, + 0x0FD2, 0x032A, 0x011D, 0x0FE7, + 0x0FCF, 0x031E, 0x012E, 0x0FE5, + 0x0FCC, 0x0312, 0x013F, 0x0FE3, + 0x0FCA, 0x0305, 0x0150, 0x0FE1, + 0x0FC8, 0x02F8, 0x0162, 0x0FDE, + 0x0FC6, 0x02EA, 0x0174, 0x0FDC, + 0x0FC5, 0x02DC, 0x0185, 0x0FDA, + 0x0FC4, 0x02CD, 0x0197, 0x0FD8, + 0x0FC3, 0x02BE, 0x01AA, 0x0FD5, + 0x0FC3, 0x02AF, 0x01BB, 0x0FD3, + 0x0FC3, 0x029F, 0x01CD, 0x0FD1, + 0x0FC3, 0x028E, 0x01E0, 0x0FCF, + 0x0FC3, 0x027E, 0x01F2, 0x0FCD, + 0x0FC4, 0x026D, 0x0203, 0x0FCC, + 0x0FC5, 0x025C, 0x0215, 0x0FCA, + 0x0FC6, 0x024B, 0x0227, 0x0FC8, + 0x0FC7, 0x0239, 0x0239, 0x0FC7, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 03-Apr-2024 +// 4t_64p_LanczosEd_p_1_p_10qb_ +// 4 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_4tap_64p_ratio_1_00[132] = { + 0x0000, 0x0400, 0x0000, 0x0000, + 0x0FF6, 0x03FF, 0x000B, 0x0000, + 0x0FED, 0x03FE, 0x0015, 0x0000, + 0x0FE4, 0x03FB, 0x0022, 0x0FFF, + 0x0FDC, 0x03F7, 0x002E, 0x0FFF, + 0x0FD5, 0x03F2, 0x003B, 0x0FFE, + 0x0FCE, 0x03EC, 0x0048, 0x0FFE, + 0x0FC8, 0x03E5, 0x0056, 0x0FFD, + 0x0FC3, 0x03DC, 0x0065, 0x0FFC, + 0x0FBE, 0x03D3, 0x0075, 0x0FFA, + 0x0FB9, 0x03C9, 0x0085, 0x0FF9, + 0x0FB6, 0x03BE, 0x0094, 0x0FF8, + 0x0FB2, 0x03B2, 0x00A6, 0x0FF6, + 0x0FB0, 0x03A5, 0x00B7, 0x0FF4, + 0x0FAD, 0x0397, 0x00CA, 0x0FF2, + 0x0FAB, 0x0389, 0x00DC, 0x0FF0, + 0x0FAA, 0x0379, 0x00EF, 0x0FEE, + 0x0FA9, 0x0369, 0x0102, 0x0FEC, + 0x0FA9, 0x0359, 0x0115, 0x0FE9, + 0x0FA9, 0x0348, 0x0129, 0x0FE6, + 0x0FA9, 0x0336, 0x013D, 0x0FE4, + 0x0FA9, 0x0323, 0x0153, 0x0FE1, + 0x0FAA, 0x0310, 0x0168, 0x0FDE, + 0x0FAC, 0x02FD, 0x017C, 0x0FDB, + 0x0FAD, 0x02E9, 0x0192, 0x0FD8, + 0x0FAF, 0x02D5, 0x01A7, 0x0FD5, + 0x0FB1, 0x02C0, 0x01BD, 0x0FD2, + 0x0FB3, 0x02AC, 0x01D2, 0x0FCF, + 0x0FB5, 0x0296, 0x01E9, 0x0FCC, + 0x0FB8, 0x0281, 0x01FE, 0x0FC9, + 0x0FBA, 0x026C, 0x0214, 0x0FC6, + 0x0FBD, 0x0256, 0x022A, 0x0FC3, + 0x0FC0, 0x0240, 0x0240, 0x0FC0, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.3_p_10qb_ +// 6 +// 64 +// input/output = 0.300000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_30[198] = { + 0x004B, 0x0100, 0x0169, 0x0101, 0x004B, 0x0000, + 0x0049, 0x00FD, 0x0169, 0x0103, 0x004E, 0x0000, + 0x0047, 0x00FA, 0x0169, 0x0106, 0x0050, 0x0000, + 0x0045, 0x00F7, 0x0168, 0x0109, 0x0052, 0x0001, + 0x0043, 0x00F5, 0x0168, 0x010B, 0x0054, 0x0001, + 0x0040, 0x00F2, 0x0168, 0x010E, 0x0057, 0x0001, + 0x003E, 0x00EF, 0x0168, 0x0110, 0x0059, 0x0002, + 0x003C, 0x00EC, 0x0167, 0x0113, 0x005C, 0x0002, + 0x003A, 0x00E9, 0x0167, 0x0116, 0x005E, 0x0002, + 0x0038, 0x00E6, 0x0166, 0x0118, 0x0061, 0x0003, + 0x0036, 0x00E3, 0x0165, 0x011C, 0x0063, 0x0003, + 0x0034, 0x00E0, 0x0165, 0x011D, 0x0066, 0x0004, + 0x0033, 0x00DD, 0x0164, 0x0120, 0x0068, 0x0004, + 0x0031, 0x00DA, 0x0163, 0x0122, 0x006B, 0x0005, + 0x002F, 0x00D7, 0x0163, 0x0125, 0x006D, 0x0005, + 0x002D, 0x00D3, 0x0162, 0x0128, 0x0070, 0x0006, + 0x002B, 0x00D0, 0x0161, 0x012A, 0x0073, 0x0007, + 0x002A, 0x00CD, 0x0160, 0x012D, 0x0075, 0x0007, + 0x0028, 0x00CA, 0x015F, 0x012F, 0x0078, 0x0008, + 0x0026, 0x00C7, 0x015E, 0x0131, 0x007B, 0x0009, + 0x0025, 0x00C4, 0x015D, 0x0133, 0x007E, 0x0009, + 0x0023, 0x00C1, 0x015C, 0x0136, 0x0080, 0x000A, + 0x0022, 0x00BE, 0x015A, 0x0138, 0x0083, 0x000B, + 0x0020, 0x00BB, 0x0159, 0x013A, 0x0086, 0x000C, + 0x001F, 0x00B8, 0x0158, 0x013B, 0x0089, 0x000D, + 0x001E, 0x00B5, 0x0156, 0x013E, 0x008C, 0x000D, + 0x001C, 0x00B2, 0x0155, 0x0140, 0x008F, 0x000E, + 0x001B, 0x00AF, 0x0153, 0x0143, 0x0091, 0x000F, + 0x0019, 0x00AC, 0x0152, 0x0145, 0x0094, 0x0010, + 0x0018, 0x00A9, 0x0150, 0x0147, 0x0097, 0x0011, + 0x0017, 0x00A6, 0x014F, 0x0148, 0x009A, 0x0012, + 0x0016, 0x00A3, 0x014D, 0x0149, 0x009D, 0x0014, + 0x0015, 0x00A0, 0x014B, 0x014B, 0x00A0, 0x0015, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.4_p_10qb_ +// 6 +// 64 +// input/output = 0.400000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_40[198] = { + 0x0028, 0x0106, 0x01A3, 0x0107, 0x0028, 0x0000, + 0x0026, 0x0102, 0x01A3, 0x010A, 0x002B, 0x0000, + 0x0024, 0x00FE, 0x01A3, 0x010F, 0x002D, 0x0FFF, + 0x0022, 0x00FA, 0x01A3, 0x0113, 0x002F, 0x0FFF, + 0x0021, 0x00F6, 0x01A3, 0x0116, 0x0031, 0x0FFF, + 0x001F, 0x00F2, 0x01A2, 0x011B, 0x0034, 0x0FFE, + 0x001D, 0x00EE, 0x01A2, 0x011F, 0x0036, 0x0FFE, + 0x001B, 0x00EA, 0x01A1, 0x0123, 0x0039, 0x0FFE, + 0x0019, 0x00E6, 0x01A1, 0x0127, 0x003B, 0x0FFE, + 0x0018, 0x00E2, 0x01A0, 0x012A, 0x003E, 0x0FFE, + 0x0016, 0x00DE, 0x01A0, 0x012E, 0x0041, 0x0FFD, + 0x0015, 0x00DA, 0x019F, 0x0132, 0x0043, 0x0FFD, + 0x0013, 0x00D6, 0x019E, 0x0136, 0x0046, 0x0FFD, + 0x0012, 0x00D2, 0x019D, 0x0139, 0x0049, 0x0FFD, + 0x0010, 0x00CE, 0x019C, 0x013D, 0x004C, 0x0FFD, + 0x000F, 0x00CA, 0x019A, 0x0141, 0x004F, 0x0FFD, + 0x000E, 0x00C6, 0x0199, 0x0144, 0x0052, 0x0FFD, + 0x000D, 0x00C2, 0x0197, 0x0148, 0x0055, 0x0FFD, + 0x000B, 0x00BE, 0x0196, 0x014C, 0x0058, 0x0FFD, + 0x000A, 0x00BA, 0x0195, 0x014F, 0x005B, 0x0FFD, + 0x0009, 0x00B6, 0x0193, 0x0153, 0x005E, 0x0FFD, + 0x0008, 0x00B2, 0x0191, 0x0157, 0x0061, 0x0FFD, + 0x0007, 0x00AE, 0x0190, 0x015A, 0x0064, 0x0FFD, + 0x0006, 0x00AA, 0x018E, 0x015D, 0x0068, 0x0FFD, + 0x0005, 0x00A6, 0x018C, 0x0161, 0x006B, 0x0FFD, + 0x0005, 0x00A2, 0x0189, 0x0164, 0x006F, 0x0FFD, + 0x0004, 0x009E, 0x0187, 0x0167, 0x0072, 0x0FFE, + 0x0003, 0x009A, 0x0185, 0x016B, 0x0075, 0x0FFE, + 0x0002, 0x0096, 0x0183, 0x016E, 0x0079, 0x0FFE, + 0x0002, 0x0093, 0x0180, 0x016F, 0x007D, 0x0FFF, + 0x0001, 0x008F, 0x017E, 0x0173, 0x0080, 0x0FFF, + 0x0001, 0x008B, 0x017B, 0x0175, 0x0084, 0x0000, + 0x0000, 0x0087, 0x0179, 0x0179, 0x0087, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.5_p_10qb_ +// 6 +// 64 +// input/output = 0.500000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_50[198] = { + 0x0000, 0x0107, 0x01F3, 0x0106, 0x0000, 0x0000, + 0x0FFE, 0x0101, 0x01F3, 0x010D, 0x0002, 0x0FFF, + 0x0FFD, 0x00FB, 0x01F3, 0x0113, 0x0003, 0x0FFF, + 0x0FFC, 0x00F6, 0x01F3, 0x0118, 0x0005, 0x0FFE, + 0x0FFA, 0x00F0, 0x01F3, 0x011E, 0x0007, 0x0FFE, + 0x0FF9, 0x00EB, 0x01F2, 0x0124, 0x0009, 0x0FFD, + 0x0FF8, 0x00E5, 0x01F2, 0x0129, 0x000B, 0x0FFD, + 0x0FF7, 0x00E0, 0x01F1, 0x012F, 0x000D, 0x0FFC, + 0x0FF6, 0x00DA, 0x01F0, 0x0135, 0x0010, 0x0FFB, + 0x0FF5, 0x00D4, 0x01EF, 0x013B, 0x0012, 0x0FFB, + 0x0FF4, 0x00CF, 0x01EE, 0x0141, 0x0014, 0x0FFA, + 0x0FF3, 0x00C9, 0x01ED, 0x0147, 0x0017, 0x0FF9, + 0x0FF2, 0x00C4, 0x01EB, 0x014C, 0x001A, 0x0FF9, + 0x0FF1, 0x00BF, 0x01EA, 0x0152, 0x001C, 0x0FF8, + 0x0FF1, 0x00B9, 0x01E8, 0x0157, 0x001F, 0x0FF8, + 0x0FF0, 0x00B4, 0x01E6, 0x015D, 0x0022, 0x0FF7, + 0x0FF0, 0x00AE, 0x01E4, 0x0163, 0x0025, 0x0FF6, + 0x0FEF, 0x00A9, 0x01E2, 0x0168, 0x0028, 0x0FF6, + 0x0FEF, 0x00A4, 0x01DF, 0x016E, 0x002B, 0x0FF5, + 0x0FEF, 0x009F, 0x01DD, 0x0172, 0x002E, 0x0FF5, + 0x0FEE, 0x009A, 0x01DA, 0x0178, 0x0032, 0x0FF4, + 0x0FEE, 0x0094, 0x01D8, 0x017E, 0x0035, 0x0FF3, + 0x0FEE, 0x008F, 0x01D5, 0x0182, 0x0039, 0x0FF3, + 0x0FEE, 0x008A, 0x01D2, 0x0188, 0x003C, 0x0FF2, + 0x0FEE, 0x0085, 0x01CF, 0x018C, 0x0040, 0x0FF2, + 0x0FEE, 0x0081, 0x01CB, 0x0191, 0x0044, 0x0FF1, + 0x0FEE, 0x007C, 0x01C8, 0x0196, 0x0047, 0x0FF1, + 0x0FEE, 0x0077, 0x01C4, 0x019C, 0x004B, 0x0FF0, + 0x0FEE, 0x0072, 0x01C1, 0x01A0, 0x004F, 0x0FF0, + 0x0FEE, 0x006E, 0x01BD, 0x01A4, 0x0053, 0x0FF0, + 0x0FEE, 0x0069, 0x01B9, 0x01A9, 0x0058, 0x0FEF, + 0x0FEE, 0x0065, 0x01B5, 0x01AD, 0x005C, 0x0FEF, + 0x0FEF, 0x0060, 0x01B1, 0x01B1, 0x0060, 0x0FEF, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.6_p_10qb_ +// 6 +// 64 +// input/output = 0.600000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_60[198] = { + 0x0FD9, 0x00FB, 0x0258, 0x00FB, 0x0FD9, 0x0000, + 0x0FD9, 0x00F3, 0x0258, 0x0102, 0x0FDA, 0x0000, + 0x0FD8, 0x00EB, 0x0258, 0x010B, 0x0FDB, 0x0FFF, + 0x0FD8, 0x00E3, 0x0258, 0x0112, 0x0FDC, 0x0FFF, + 0x0FD8, 0x00DC, 0x0257, 0x011B, 0x0FDC, 0x0FFE, + 0x0FD7, 0x00D4, 0x0256, 0x0123, 0x0FDE, 0x0FFE, + 0x0FD7, 0x00CD, 0x0255, 0x012B, 0x0FDF, 0x0FFD, + 0x0FD7, 0x00C5, 0x0254, 0x0133, 0x0FE0, 0x0FFD, + 0x0FD7, 0x00BE, 0x0252, 0x013C, 0x0FE1, 0x0FFC, + 0x0FD7, 0x00B6, 0x0251, 0x0143, 0x0FE3, 0x0FFC, + 0x0FD8, 0x00AF, 0x024F, 0x014B, 0x0FE4, 0x0FFB, + 0x0FD8, 0x00A8, 0x024C, 0x0154, 0x0FE6, 0x0FFA, + 0x0FD8, 0x00A1, 0x024A, 0x015B, 0x0FE8, 0x0FFA, + 0x0FD9, 0x009A, 0x0247, 0x0163, 0x0FEA, 0x0FF9, + 0x0FD9, 0x0093, 0x0244, 0x016C, 0x0FEC, 0x0FF8, + 0x0FD9, 0x008C, 0x0241, 0x0174, 0x0FEF, 0x0FF7, + 0x0FDA, 0x0085, 0x023E, 0x017B, 0x0FF1, 0x0FF7, + 0x0FDB, 0x007F, 0x023A, 0x0183, 0x0FF3, 0x0FF6, + 0x0FDB, 0x0078, 0x0237, 0x018B, 0x0FF6, 0x0FF5, + 0x0FDC, 0x0072, 0x0233, 0x0192, 0x0FF9, 0x0FF4, + 0x0FDD, 0x006C, 0x022F, 0x0199, 0x0FFC, 0x0FF3, + 0x0FDD, 0x0065, 0x022A, 0x01A3, 0x0FFF, 0x0FF2, + 0x0FDE, 0x005F, 0x0226, 0x01AA, 0x0002, 0x0FF1, + 0x0FDF, 0x005A, 0x0221, 0x01B0, 0x0006, 0x0FF0, + 0x0FE0, 0x0054, 0x021C, 0x01B7, 0x0009, 0x0FF0, + 0x0FE1, 0x004E, 0x0217, 0x01BE, 0x000D, 0x0FEF, + 0x0FE2, 0x0048, 0x0212, 0x01C6, 0x0010, 0x0FEE, + 0x0FE3, 0x0043, 0x020C, 0x01CD, 0x0014, 0x0FED, + 0x0FE4, 0x003E, 0x0207, 0x01D3, 0x0018, 0x0FEC, + 0x0FE5, 0x0039, 0x0200, 0x01DA, 0x001D, 0x0FEB, + 0x0FE6, 0x0034, 0x01FA, 0x01E1, 0x0021, 0x0FEA, + 0x0FE7, 0x002F, 0x01F5, 0x01E7, 0x0025, 0x0FE9, + 0x0FE8, 0x002A, 0x01EE, 0x01EE, 0x002A, 0x0FE8, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.7_p_10qb_ +// 6 +// 64 +// input/output = 0.700000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_70[198] = { + 0x0FC0, 0x00DA, 0x02CC, 0x00DA, 0x0FC0, 0x0000, + 0x0FC1, 0x00D0, 0x02CC, 0x00E4, 0x0FBF, 0x0000, + 0x0FC2, 0x00C6, 0x02CB, 0x00EF, 0x0FBE, 0x0000, + 0x0FC3, 0x00BC, 0x02CA, 0x00F9, 0x0FBE, 0x0000, + 0x0FC4, 0x00B2, 0x02C9, 0x0104, 0x0FBD, 0x0000, + 0x0FC5, 0x00A8, 0x02C7, 0x010F, 0x0FBD, 0x0000, + 0x0FC7, 0x009F, 0x02C5, 0x0119, 0x0FBC, 0x0000, + 0x0FC8, 0x0095, 0x02C3, 0x0124, 0x0FBC, 0x0000, + 0x0FC9, 0x008C, 0x02C0, 0x012F, 0x0FBC, 0x0000, + 0x0FCB, 0x0083, 0x02BD, 0x0139, 0x0FBC, 0x0000, + 0x0FCC, 0x007A, 0x02BA, 0x0144, 0x0FBC, 0x0000, + 0x0FCE, 0x0072, 0x02B6, 0x014D, 0x0FBD, 0x0000, + 0x0FD0, 0x0069, 0x02B2, 0x0159, 0x0FBD, 0x0FFF, + 0x0FD1, 0x0061, 0x02AD, 0x0164, 0x0FBE, 0x0FFF, + 0x0FD3, 0x0059, 0x02A9, 0x016E, 0x0FBF, 0x0FFE, + 0x0FD4, 0x0051, 0x02A4, 0x017A, 0x0FBF, 0x0FFE, + 0x0FD6, 0x0049, 0x029E, 0x0184, 0x0FC1, 0x0FFE, + 0x0FD8, 0x0042, 0x0299, 0x018E, 0x0FC2, 0x0FFD, + 0x0FD9, 0x003A, 0x0293, 0x019B, 0x0FC3, 0x0FFC, + 0x0FDB, 0x0033, 0x028D, 0x01A4, 0x0FC5, 0x0FFC, + 0x0FDC, 0x002D, 0x0286, 0x01AF, 0x0FC7, 0x0FFB, + 0x0FDE, 0x0026, 0x0280, 0x01BA, 0x0FC8, 0x0FFA, + 0x0FE0, 0x001F, 0x0279, 0x01C4, 0x0FCB, 0x0FF9, + 0x0FE1, 0x0019, 0x0272, 0x01CE, 0x0FCD, 0x0FF9, + 0x0FE3, 0x0013, 0x026A, 0x01D9, 0x0FCF, 0x0FF8, + 0x0FE4, 0x000D, 0x0263, 0x01E3, 0x0FD2, 0x0FF7, + 0x0FE6, 0x0008, 0x025B, 0x01EC, 0x0FD5, 0x0FF6, + 0x0FE7, 0x0002, 0x0253, 0x01F7, 0x0FD8, 0x0FF5, + 0x0FE9, 0x0FFD, 0x024A, 0x0202, 0x0FDB, 0x0FF3, + 0x0FEA, 0x0FF8, 0x0242, 0x020B, 0x0FDF, 0x0FF2, + 0x0FEC, 0x0FF3, 0x0239, 0x0215, 0x0FE2, 0x0FF1, + 0x0FED, 0x0FEF, 0x0230, 0x021E, 0x0FE6, 0x0FF0, + 0x0FEF, 0x0FEB, 0x0226, 0x0226, 0x0FEB, 0x0FEF, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.8_p_10qb_ +// 6 +// 64 +// input/output = 0.800000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_80[198] = { + 0x0FBF, 0x00A1, 0x0340, 0x00A1, 0x0FBF, 0x0000, + 0x0FC1, 0x0095, 0x0340, 0x00AD, 0x0FBC, 0x0001, + 0x0FC4, 0x0089, 0x033E, 0x00BA, 0x0FBA, 0x0001, + 0x0FC6, 0x007D, 0x033D, 0x00C6, 0x0FB8, 0x0002, + 0x0FC9, 0x0072, 0x033A, 0x00D3, 0x0FB6, 0x0002, + 0x0FCC, 0x0067, 0x0338, 0x00DF, 0x0FB3, 0x0003, + 0x0FCE, 0x005C, 0x0334, 0x00EE, 0x0FB1, 0x0003, + 0x0FD1, 0x0051, 0x0331, 0x00FA, 0x0FAF, 0x0004, + 0x0FD3, 0x0047, 0x032D, 0x0108, 0x0FAD, 0x0004, + 0x0FD6, 0x003D, 0x0328, 0x0116, 0x0FAB, 0x0004, + 0x0FD8, 0x0033, 0x0323, 0x0123, 0x0FAA, 0x0005, + 0x0FDB, 0x002A, 0x031D, 0x0131, 0x0FA8, 0x0005, + 0x0FDD, 0x0021, 0x0317, 0x013F, 0x0FA7, 0x0005, + 0x0FDF, 0x0018, 0x0311, 0x014D, 0x0FA5, 0x0006, + 0x0FE2, 0x0010, 0x030A, 0x015A, 0x0FA4, 0x0006, + 0x0FE4, 0x0008, 0x0302, 0x0169, 0x0FA3, 0x0006, + 0x0FE6, 0x0000, 0x02FB, 0x0177, 0x0FA2, 0x0006, + 0x0FE8, 0x0FF9, 0x02F3, 0x0185, 0x0FA1, 0x0006, + 0x0FEB, 0x0FF1, 0x02EA, 0x0193, 0x0FA1, 0x0006, + 0x0FED, 0x0FEB, 0x02E1, 0x01A1, 0x0FA0, 0x0006, + 0x0FEE, 0x0FE4, 0x02D8, 0x01B0, 0x0FA0, 0x0006, + 0x0FF0, 0x0FDE, 0x02CE, 0x01BE, 0x0FA0, 0x0006, + 0x0FF2, 0x0FD8, 0x02C5, 0x01CB, 0x0FA0, 0x0006, + 0x0FF4, 0x0FD3, 0x02BA, 0x01D8, 0x0FA1, 0x0006, + 0x0FF6, 0x0FCD, 0x02B0, 0x01E7, 0x0FA1, 0x0005, + 0x0FF7, 0x0FC8, 0x02A5, 0x01F5, 0x0FA2, 0x0005, + 0x0FF9, 0x0FC4, 0x029A, 0x0202, 0x0FA3, 0x0004, + 0x0FFA, 0x0FC0, 0x028E, 0x0210, 0x0FA4, 0x0004, + 0x0FFB, 0x0FBC, 0x0283, 0x021D, 0x0FA6, 0x0003, + 0x0FFD, 0x0FB8, 0x0276, 0x022A, 0x0FA8, 0x0003, + 0x0FFE, 0x0FB4, 0x026B, 0x0237, 0x0FAA, 0x0002, + 0x0FFF, 0x0FB1, 0x025E, 0x0245, 0x0FAC, 0x0001, + 0x0000, 0x0FAE, 0x0252, 0x0252, 0x0FAE, 0x0000, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_0.9_p_10qb_ +// 6 +// 64 +// input/output = 0.900000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_0_90[198] = { + 0x0FD8, 0x0055, 0x03A7, 0x0054, 0x0FD8, 0x0000, + 0x0FDB, 0x0047, 0x03A7, 0x0063, 0x0FD4, 0x0000, + 0x0FDF, 0x003B, 0x03A5, 0x006F, 0x0FD1, 0x0001, + 0x0FE2, 0x002E, 0x03A3, 0x007E, 0x0FCD, 0x0002, + 0x0FE5, 0x0022, 0x03A0, 0x008D, 0x0FCA, 0x0002, + 0x0FE8, 0x0017, 0x039D, 0x009B, 0x0FC6, 0x0003, + 0x0FEB, 0x000C, 0x0398, 0x00AC, 0x0FC2, 0x0003, + 0x0FEE, 0x0001, 0x0394, 0x00BA, 0x0FBF, 0x0004, + 0x0FF1, 0x0FF7, 0x038E, 0x00CA, 0x0FBB, 0x0005, + 0x0FF4, 0x0FED, 0x0388, 0x00DA, 0x0FB8, 0x0005, + 0x0FF6, 0x0FE4, 0x0381, 0x00EB, 0x0FB4, 0x0006, + 0x0FF9, 0x0FDB, 0x037A, 0x00FA, 0x0FB1, 0x0007, + 0x0FFB, 0x0FD3, 0x0372, 0x010B, 0x0FAD, 0x0008, + 0x0FFD, 0x0FCB, 0x0369, 0x011D, 0x0FAA, 0x0008, + 0x0000, 0x0FC3, 0x0360, 0x012E, 0x0FA6, 0x0009, + 0x0002, 0x0FBC, 0x0356, 0x013F, 0x0FA3, 0x000A, + 0x0003, 0x0FB6, 0x034C, 0x0150, 0x0FA0, 0x000B, + 0x0005, 0x0FB0, 0x0341, 0x0162, 0x0F9D, 0x000B, + 0x0007, 0x0FAA, 0x0336, 0x0173, 0x0F9A, 0x000C, + 0x0008, 0x0FA5, 0x032A, 0x0185, 0x0F97, 0x000D, + 0x000A, 0x0FA0, 0x031E, 0x0197, 0x0F94, 0x000D, + 0x000B, 0x0F9B, 0x0311, 0x01A9, 0x0F92, 0x000E, + 0x000C, 0x0F97, 0x0303, 0x01BC, 0x0F8F, 0x000F, + 0x000D, 0x0F94, 0x02F6, 0x01CD, 0x0F8D, 0x000F, + 0x000E, 0x0F91, 0x02E8, 0x01DE, 0x0F8B, 0x0010, + 0x000F, 0x0F8E, 0x02D9, 0x01F1, 0x0F89, 0x0010, + 0x0010, 0x0F8B, 0x02CA, 0x0202, 0x0F88, 0x0011, + 0x0010, 0x0F89, 0x02BB, 0x0214, 0x0F87, 0x0011, + 0x0011, 0x0F87, 0x02AB, 0x0226, 0x0F86, 0x0011, + 0x0011, 0x0F86, 0x029C, 0x0236, 0x0F85, 0x0012, + 0x0011, 0x0F85, 0x028B, 0x0249, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x027B, 0x0259, 0x0F84, 0x0012, + 0x0012, 0x0F84, 0x026A, 0x026A, 0x0F84, 0x0012, +}; + +//======================================================== +// gen_scaler_coeffs_cnf_file.m +// make_test_script.m +// 02-Apr-2024 +// 6t_64p_LanczosEd_p_1_p_10qb_ +// 6 +// 64 +// input/output = 1.000000000000 +// LanczosEd +// S1.10 +//======================================================== +static const uint16_t easf_filter_6tap_64p_ratio_1_00[198] = { + 0x0000, 0x0000, 0x0400, 0x0000, 0x0000, 0x0000, + 0x0003, 0x0FF3, 0x0400, 0x000D, 0x0FFD, 0x0000, + 0x0006, 0x0FE7, 0x03FE, 0x001C, 0x0FF9, 0x0000, + 0x0009, 0x0FDB, 0x03FC, 0x002B, 0x0FF5, 0x0000, + 0x000C, 0x0FD0, 0x03F9, 0x003A, 0x0FF1, 0x0000, + 0x000E, 0x0FC5, 0x03F5, 0x004A, 0x0FED, 0x0001, + 0x0011, 0x0FBB, 0x03F0, 0x005A, 0x0FE9, 0x0001, + 0x0013, 0x0FB2, 0x03EB, 0x006A, 0x0FE5, 0x0001, + 0x0015, 0x0FA9, 0x03E4, 0x007B, 0x0FE1, 0x0002, + 0x0017, 0x0FA1, 0x03DD, 0x008D, 0x0FDC, 0x0002, + 0x0018, 0x0F99, 0x03D4, 0x00A0, 0x0FD8, 0x0003, + 0x001A, 0x0F92, 0x03CB, 0x00B2, 0x0FD3, 0x0004, + 0x001B, 0x0F8C, 0x03C1, 0x00C6, 0x0FCE, 0x0004, + 0x001C, 0x0F86, 0x03B7, 0x00D9, 0x0FC9, 0x0005, + 0x001D, 0x0F80, 0x03AB, 0x00EE, 0x0FC4, 0x0006, + 0x001E, 0x0F7C, 0x039F, 0x0101, 0x0FBF, 0x0007, + 0x001F, 0x0F78, 0x0392, 0x0115, 0x0FBA, 0x0008, + 0x001F, 0x0F74, 0x0385, 0x012B, 0x0FB5, 0x0008, + 0x0020, 0x0F71, 0x0376, 0x0140, 0x0FB0, 0x0009, + 0x0020, 0x0F6E, 0x0367, 0x0155, 0x0FAB, 0x000B, + 0x0020, 0x0F6C, 0x0357, 0x016B, 0x0FA6, 0x000C, + 0x0020, 0x0F6A, 0x0347, 0x0180, 0x0FA2, 0x000D, + 0x0020, 0x0F69, 0x0336, 0x0196, 0x0F9D, 0x000E, + 0x0020, 0x0F69, 0x0325, 0x01AB, 0x0F98, 0x000F, + 0x001F, 0x0F68, 0x0313, 0x01C3, 0x0F93, 0x0010, + 0x001F, 0x0F69, 0x0300, 0x01D8, 0x0F8F, 0x0011, + 0x001E, 0x0F69, 0x02ED, 0x01EF, 0x0F8B, 0x0012, + 0x001D, 0x0F6A, 0x02D9, 0x0205, 0x0F87, 0x0014, + 0x001D, 0x0F6C, 0x02C5, 0x021A, 0x0F83, 0x0015, + 0x001C, 0x0F6E, 0x02B1, 0x0230, 0x0F7F, 0x0016, + 0x001B, 0x0F70, 0x029C, 0x0247, 0x0F7B, 0x0017, + 0x001A, 0x0F72, 0x0287, 0x025D, 0x0F78, 0x0018, + 0x0019, 0x0F75, 0x0272, 0x0272, 0x0F75, 0x0019, +}; + +/* Converted scaler coeff tables from S1.10 to S1.12 */ +static uint16_t easf_filter_3tap_64p_ratio_0_30_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_40_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_50_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_60_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_70_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_80_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_0_90_s1_12[99]; +static uint16_t easf_filter_3tap_64p_ratio_1_00_s1_12[99]; +static uint16_t easf_filter_4tap_64p_ratio_0_30_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_40_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_50_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_60_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_70_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_80_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_0_90_s1_12[132]; +static uint16_t easf_filter_4tap_64p_ratio_1_00_s1_12[132]; +static uint16_t easf_filter_6tap_64p_ratio_0_30_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_40_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_50_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_60_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_70_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_80_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_0_90_s1_12[198]; +static uint16_t easf_filter_6tap_64p_ratio_1_00_s1_12[198]; + +struct scale_ratio_to_reg_value_lookup easf_v_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_h_bf3_mode_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0002}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_6tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_6tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x251F}, + {5, 10, 0x291F}, + {6, 10, 0xA51F}, + {7, 10, 0xA51F}, + {8, 10, 0xAA66}, + {9, 10, 0xA51F}, + {1, 1, 0xA640}, + {-1, -1, 0xA640}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_6tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x9600}, + {5, 10, 0xA460}, + {6, 10, 0xA8E0}, + {7, 10, 0xAC00}, + {8, 10, 0xAD20}, + {9, 10, 0xAFC0}, + {1, 1, 0xB058}, + {-1, -1, 0xB058}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain6_4tap_lookup[] = { + {3, 10, 0x4100}, + {4, 10, 0x4100}, + {5, 10, 0x4100}, + {6, 10, 0x4100}, + {7, 10, 0x4100}, + {8, 10, 0x4100}, + {9, 10, 0x4100}, + {1, 1, 0x4100}, + {-1, -1, 0x4100}, +}; + +struct scale_ratio_to_reg_value_lookup easf_reducer_gain4_4tap_lookup[] = { + {3, 10, 0x4000}, + {4, 10, 0x4000}, + {5, 10, 0x4000}, + {6, 10, 0x4000}, + {7, 10, 0x4000}, + {8, 10, 0x4000}, + {9, 10, 0x4000}, + {1, 1, 0x4000}, + {-1, -1, 0x4000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring6_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x0000}, + {1, 1, 0x0000}, + {-1, -1, 0x0000}, +}; + +struct scale_ratio_to_reg_value_lookup easf_gain_ring4_4tap_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x9900}, + {7, 10, 0xA100}, + {8, 10, 0xA8C0}, + {9, 10, 0xAB20}, + {1, 1, 0xAC00}, + {-1, -1, 0xAC00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_uptilt_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4100}, + {9, 10, 0x9F00}, + {1, 1, 0xA4C0}, + {-1, -1, 0xA8D8}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt_maxval_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x4000}, + {9, 10, 0x24FE}, + {1, 1, 0x2D64}, + {-1, -1, 0x3ADB}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_dntilt_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x3886}, + {9, 10, 0x3940}, + {1, 1, 0x3A4E}, + {-1, -1, 0x3B66}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt1_slope_lookup[] = { + {3, 10, 0x3800}, + {4, 10, 0x3800}, + {5, 10, 0x3800}, + {6, 10, 0x3800}, + {7, 10, 0x3800}, + {8, 10, 0x36F4}, + {9, 10, 0x359C}, + {1, 1, 0x3360}, + {-1, -1, 0x2F20}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_slope_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x359C}, + {1, 1, 0x31F0}, + {-1, -1, 0x1F00}, +}; + +struct scale_ratio_to_reg_value_lookup easf_3tap_uptilt2_offset_lookup[] = { + {3, 10, 0x0000}, + {4, 10, 0x0000}, + {5, 10, 0x0000}, + {6, 10, 0x0000}, + {7, 10, 0x0000}, + {8, 10, 0x0000}, + {9, 10, 0x9F00}, + {1, 1, 0xA400}, + {-1, -1, 0x9E00}, +}; + +void spl_init_easf_filter_coeffs(void) +{ + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_30, + easf_filter_3tap_64p_ratio_0_30_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_40, + easf_filter_3tap_64p_ratio_0_40_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_50, + easf_filter_3tap_64p_ratio_0_50_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_60, + easf_filter_3tap_64p_ratio_0_60_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_70, + easf_filter_3tap_64p_ratio_0_70_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_80, + easf_filter_3tap_64p_ratio_0_80_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_0_90, + easf_filter_3tap_64p_ratio_0_90_s1_12, 3); + convert_filter_s1_10_to_s1_12(easf_filter_3tap_64p_ratio_1_00, + easf_filter_3tap_64p_ratio_1_00_s1_12, 3); + + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_30, + easf_filter_4tap_64p_ratio_0_30_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_40, + easf_filter_4tap_64p_ratio_0_40_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_50, + easf_filter_4tap_64p_ratio_0_50_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_60, + easf_filter_4tap_64p_ratio_0_60_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_70, + easf_filter_4tap_64p_ratio_0_70_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_80, + easf_filter_4tap_64p_ratio_0_80_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_0_90, + easf_filter_4tap_64p_ratio_0_90_s1_12, 4); + convert_filter_s1_10_to_s1_12(easf_filter_4tap_64p_ratio_1_00, + easf_filter_4tap_64p_ratio_1_00_s1_12, 4); + + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_30, + easf_filter_6tap_64p_ratio_0_30_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_40, + easf_filter_6tap_64p_ratio_0_40_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_50, + easf_filter_6tap_64p_ratio_0_50_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_60, + easf_filter_6tap_64p_ratio_0_60_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_70, + easf_filter_6tap_64p_ratio_0_70_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_80, + easf_filter_6tap_64p_ratio_0_80_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_0_90, + easf_filter_6tap_64p_ratio_0_90_s1_12, 6); + convert_filter_s1_10_to_s1_12(easf_filter_6tap_64p_ratio_1_00, + easf_filter_6tap_64p_ratio_1_00_s1_12, 6); +} + +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio) +{ + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) + return easf_filter_3tap_64p_ratio_0_30_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) + return easf_filter_3tap_64p_ratio_0_40_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) + return easf_filter_3tap_64p_ratio_0_50_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) + return easf_filter_3tap_64p_ratio_0_60_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) + return easf_filter_3tap_64p_ratio_0_70_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) + return easf_filter_3tap_64p_ratio_0_80_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) + return easf_filter_3tap_64p_ratio_0_90_s1_12; + else + return easf_filter_3tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio) +{ + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) + return easf_filter_4tap_64p_ratio_0_30_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) + return easf_filter_4tap_64p_ratio_0_40_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) + return easf_filter_4tap_64p_ratio_0_50_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) + return easf_filter_4tap_64p_ratio_0_60_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) + return easf_filter_4tap_64p_ratio_0_70_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) + return easf_filter_4tap_64p_ratio_0_80_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) + return easf_filter_4tap_64p_ratio_0_90_s1_12; + else + return easf_filter_4tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio) +{ + if (ratio.value < spl_fixpt_from_fraction(3, 10).value) + return easf_filter_6tap_64p_ratio_0_30_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(4, 10).value) + return easf_filter_6tap_64p_ratio_0_40_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(5, 10).value) + return easf_filter_6tap_64p_ratio_0_50_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(6, 10).value) + return easf_filter_6tap_64p_ratio_0_60_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(7, 10).value) + return easf_filter_6tap_64p_ratio_0_70_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(8, 10).value) + return easf_filter_6tap_64p_ratio_0_80_s1_12; + else if (ratio.value < spl_fixpt_from_fraction(9, 10).value) + return easf_filter_6tap_64p_ratio_0_90_s1_12; + else + return easf_filter_6tap_64p_ratio_1_00_s1_12; +} + +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) +{ + if (taps == 6) + return spl_get_easf_filter_6tap_64p(ratio); + else if (taps == 4) + return spl_get_easf_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_easf_filter_3tap_64p(ratio); + else { + /* should never happen, bug */ + SPL_BREAK_TO_DEBUGGER(); + return NULL; + } +} + +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h) +{ + /* + * Old coefficients calculated scaling ratio = input / output + * New coefficients are calculated based on = output / input + */ + if (enable_easf_h) { + dscl_prog_data->filter_h = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps, data->recip_ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.h_taps_c, data->recip_ratios.horz_c); + } else { + dscl_prog_data->filter_h = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps, data->ratios.horz); + + dscl_prog_data->filter_h_c = spl_dscl_get_filter_coeffs_64p( + data->taps.h_taps_c, data->ratios.horz_c); + } + if (enable_easf_v) { + dscl_prog_data->filter_v = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps, data->recip_ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_easf_filter_coeffs_64p( + data->taps.v_taps_c, data->recip_ratios.vert_c); + } else { + dscl_prog_data->filter_v = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps, data->ratios.vert); + + dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( + data->taps.v_taps_c, data->ratios.vert_c); + } +} + +static uint32_t spl_easf_get_scale_ratio_to_reg_value(struct spl_fixed31_32 ratio, + struct scale_ratio_to_reg_value_lookup *lookup_table_base_ptr, + unsigned int num_entries) +{ + unsigned int count = 0; + uint32_t value = 0; + struct scale_ratio_to_reg_value_lookup *lookup_table_index_ptr; + + lookup_table_index_ptr = (lookup_table_base_ptr + num_entries - 1); + value = lookup_table_index_ptr->reg_value; + + while (count < num_entries) { + + lookup_table_index_ptr = (lookup_table_base_ptr + count); + if (lookup_table_index_ptr->numer < 0) + break; + + if (ratio.value < spl_fixpt_from_fraction( + lookup_table_index_ptr->numer, + lookup_table_index_ptr->denom).value) { + value = lookup_table_index_ptr->reg_value; + break; + } + + count++; + } + return value; +} +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_v_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_v_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries = sizeof(easf_h_bf3_mode_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_h_bf3_mode_lookup, num_entries); + return value; +} +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_reducer_gain4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_reducer_gain4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_reducer_gain4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring6_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring6_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring6_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 4) { + num_entries = sizeof(easf_gain_ring4_4tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_4tap_lookup, num_entries); + } else if (taps == 6) { + num_entries = sizeof(easf_gain_ring4_6tap_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_gain_ring4_6tap_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_uptilt_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_uptilt_offset_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt_maxval_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt_maxval_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_dntilt_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_dntilt_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt1_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt1_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_slope_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_slope_lookup, num_entries); + } else + value = 0; + return value; +} +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio) +{ + uint32_t value; + unsigned int num_entries; + + if (taps == 3) { + num_entries = sizeof(easf_3tap_uptilt2_offset_lookup) / + sizeof(struct scale_ratio_to_reg_value_lookup); + value = spl_easf_get_scale_ratio_to_reg_value(ratio, + easf_3tap_uptilt2_offset_lookup, num_entries); + } else + value = 0; + return value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h new file mode 100644 index 0000000000000..8bb2b8108e38a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_easf_filters.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __DC_SPL_SCL_EASF_FILTERS_H__ +#define __DC_SPL_SCL_EASF_FILTERS_H__ + +#include "dc_spl_types.h" + +struct scale_ratio_to_reg_value_lookup { + int numer; + int denom; + const uint32_t reg_value; +}; + +void spl_init_easf_filter_coeffs(void); +uint16_t *spl_get_easf_filter_3tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_4tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_get_easf_filter_6tap_64p(struct spl_fixed31_32 ratio); +uint16_t *spl_dscl_get_easf_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); +void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, + const struct spl_scaler_data *data, bool enable_easf_v, + bool enable_easf_h); + +uint32_t spl_get_v_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_h_bf3_mode(struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_reducer_gain4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing6(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_gainRing4(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_uptilt_offset(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt_maxval(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_dntilt_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt1_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_slope(int taps, struct spl_fixed31_32 ratio); +uint32_t spl_get_3tap_uptilt2_offset(int taps, struct spl_fixed31_32 ratio); + +#endif /* __DC_SPL_SCL_EASF_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index e2baaf5841396..b02c7b0b262b8 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include "spl_debug.h" #include "dc_spl_scl_filters.h" //========================================= // = 2 @@ -1317,97 +1318,97 @@ static const uint16_t filter_8tap_64p_183[264] = { 0x3FD4, 0x3F84, 0x0214, 0x0694, 0x0694, 0x0214, 0x3F84, 0x3FD4 }; -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_16p_149; else return filter_3tap_16p_183; } -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_3tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_3tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_3tap_64p_149; else return filter_3tap_64p_183; } -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_16p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_16p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_16p_149; else return filter_4tap_16p_183; } -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_4tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_4tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_4tap_64p_149; else return filter_4tap_64p_183; } -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_5tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_5tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_5tap_64p_149; else return filter_5tap_64p_183; } -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_6tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_6tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_6tap_64p_149; else return filter_6tap_64p_183; } -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_7tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_7tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_7tap_64p_149; else return filter_7tap_64p_183; } -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio) +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio) { - if (ratio.value < dc_fixpt_one.value) + if (ratio.value < spl_fixpt_one.value) return filter_8tap_64p_upscale; - else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(4, 3).value) return filter_8tap_64p_116; - else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) + else if (ratio.value < spl_fixpt_from_fraction(5, 3).value) return filter_8tap_64p_149; else return filter_8tap_64p_183; @@ -1422,3 +1423,29 @@ const uint16_t *spl_get_filter_2tap_64p(void) { return filter_2tap_64p; } + +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio) +{ + if (taps == 8) + return spl_get_filter_8tap_64p(ratio); + else if (taps == 7) + return spl_get_filter_7tap_64p(ratio); + else if (taps == 6) + return spl_get_filter_6tap_64p(ratio); + else if (taps == 5) + return spl_get_filter_5tap_64p(ratio); + else if (taps == 4) + return spl_get_filter_4tap_64p(ratio); + else if (taps == 3) + return spl_get_filter_3tap_64p(ratio); + else if (taps == 2) + return spl_get_filter_2tap_64p(); + else if (taps == 1) + return NULL; + else { + /* should never happen, bug */ + SPL_BREAK_TO_DEBUGGER(); + return NULL; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h index 6d96aca53b24d..48202bc4f81e8 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.h @@ -7,53 +7,16 @@ #include "dc_spl_types.h" -const uint16_t *spl_get_filter_3tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_3tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_5tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_6tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_7tap_64p(struct fixed31_32 ratio); -const uint16_t *spl_get_filter_8tap_64p(struct fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_3tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_16p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_4tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_5tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_6tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_7tap_64p(struct spl_fixed31_32 ratio); +const uint16_t *spl_get_filter_8tap_64p(struct spl_fixed31_32 ratio); const uint16_t *spl_get_filter_2tap_16p(void); const uint16_t *spl_get_filter_2tap_64p(void); -const uint16_t *spl_get_filter_3tap_16p_upscale(void); -const uint16_t *spl_get_filter_3tap_16p_116(void); -const uint16_t *spl_get_filter_3tap_16p_149(void); -const uint16_t *spl_get_filter_3tap_16p_183(void); +const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct spl_fixed31_32 ratio); -const uint16_t *spl_get_filter_4tap_16p_upscale(void); -const uint16_t *spl_get_filter_4tap_16p_116(void); -const uint16_t *spl_get_filter_4tap_16p_149(void); -const uint16_t *spl_get_filter_4tap_16p_183(void); - -const uint16_t *spl_get_filter_3tap_64p_upscale(void); -const uint16_t *spl_get_filter_3tap_64p_116(void); -const uint16_t *spl_get_filter_3tap_64p_149(void); -const uint16_t *spl_get_filter_3tap_64p_183(void); - -const uint16_t *spl_get_filter_4tap_64p_upscale(void); -const uint16_t *spl_get_filter_4tap_64p_116(void); -const uint16_t *spl_get_filter_4tap_64p_149(void); -const uint16_t *spl_get_filter_4tap_64p_183(void); - -const uint16_t *spl_get_filter_5tap_64p_upscale(void); -const uint16_t *spl_get_filter_5tap_64p_116(void); -const uint16_t *spl_get_filter_5tap_64p_149(void); -const uint16_t *spl_get_filter_5tap_64p_183(void); - -const uint16_t *spl_get_filter_6tap_64p_upscale(void); -const uint16_t *spl_get_filter_6tap_64p_116(void); -const uint16_t *spl_get_filter_6tap_64p_149(void); -const uint16_t *spl_get_filter_6tap_64p_183(void); - -const uint16_t *spl_get_filter_7tap_64p_upscale(void); -const uint16_t *spl_get_filter_7tap_64p_116(void); -const uint16_t *spl_get_filter_7tap_64p_149(void); -const uint16_t *spl_get_filter_7tap_64p_183(void); - -const uint16_t *spl_get_filter_8tap_64p_upscale(void); -const uint16_t *spl_get_filter_8tap_64p_116(void); -const uint16_t *spl_get_filter_8tap_64p_149(void); -const uint16_t *spl_get_filter_8tap_64p_183(void); #endif /* __DC_SPL_SCL_FILTERS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 36d10b0f2eed1..8b00ccb1dfdad 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,14 +2,14 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "os_types.h" // swap -#ifndef ASSERT -#define ASSERT(_bool) ((void *)0) -#endif -#include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ +#include "spl_debug.h" +#include "spl_os_types.h" // swap +#include "spl_fixpt31_32.h" // fixed31_32 and related functions +#include "spl_custom_float.h" // custom float and related functions + struct spl_size { uint32_t width; uint32_t height; @@ -22,16 +22,16 @@ struct spl_rect { }; struct spl_ratios { - struct fixed31_32 horz; - struct fixed31_32 vert; - struct fixed31_32 horz_c; - struct fixed31_32 vert_c; + struct spl_fixed31_32 horz; + struct spl_fixed31_32 vert; + struct spl_fixed31_32 horz_c; + struct spl_fixed31_32 vert_c; }; struct spl_inits { - struct fixed31_32 h; - struct fixed31_32 h_c; - struct fixed31_32 v; - struct fixed31_32 v_c; + struct spl_fixed31_32 h; + struct spl_fixed31_32 h_c; + struct spl_fixed31_32 v; + struct spl_fixed31_32 v_c; }; struct spl_taps { @@ -64,6 +64,8 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ SPL_PIXEL_FORMAT_INVALID, + SPL_PIXEL_FORMAT_422BPP8, + SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, @@ -135,6 +137,7 @@ struct spl_scaler_data { struct spl_rect viewport_c; struct spl_rect recout; struct spl_ratios ratios; + struct spl_ratios recip_ratios; struct spl_inits inits; }; @@ -247,6 +250,7 @@ enum isharp_en { ISHARP_DISABLE, ISHARP_ENABLE }; +#define ISHARP_LUT_TABLE_SIZE 32 // Below struct holds values that can be directly used to program // hardware registers. No conversion/clamping is required struct dscl_prog_data { @@ -397,18 +401,24 @@ struct dscl_prog_data { uint32_t isharp_nl_en; // ISHARP_NL_EN ? TODO:check this struct isharp_lba isharp_lba; // ISHARP_LBA struct isharp_fmt isharp_fmt; // ISHARP_FMT - const uint32_t *isharp_delta; + uint32_t isharp_delta[ISHARP_LUT_TABLE_SIZE]; struct isharp_nldelta_sclip isharp_nldelta_sclip; // ISHARP_NLDELTA_SCLIP /* blur and scale filter */ const uint16_t *filter_blur_scale_v; const uint16_t *filter_blur_scale_h; + int sharpness_level; /* Track sharpness level */ }; /* SPL input and output definitions */ -// SPL outputs struct -struct spl_out { +// SPL scratch struct +struct spl_scratch { // Pack all SPL outputs in scl_data struct spl_scaler_data scl_data; +}; + +/* SPL input and output definitions */ +// SPL outputs struct +struct spl_out { // Pack all output need to program hw registers struct dscl_prog_data *dscl_prog_data; }; @@ -450,20 +460,43 @@ struct basic_out { bool alpha_en; bool use_two_pixels_per_container; }; -enum explicit_sharpness { - SHARPNESS_LOW = 0, - SHARPNESS_MID, - SHARPNESS_HIGH -}; -struct adaptive_sharpness { +enum sharpness_setting { + SHARPNESS_HW_OFF = 0, + SHARPNESS_ZERO, + SHARPNESS_CUSTOM +}; +struct spl_sharpness_range { + int sdr_rgb_min; + int sdr_rgb_max; + int sdr_rgb_mid; + int sdr_yuv_min; + int sdr_yuv_max; + int sdr_yuv_mid; + int hdr_rgb_min; + int hdr_rgb_max; + int hdr_rgb_mid; +}; +struct adaptive_sharpness { bool enable; - enum explicit_sharpness sharpness; + int sharpness_level; + struct spl_sharpness_range sharpness_range; }; enum linear_light_scaling { // convert it in translation logic LLS_PREF_DONT_CARE = 0, LLS_PREF_YES, LLS_PREF_NO }; +enum sharpen_policy { + SHARPEN_ALWAYS = 0, + SHARPEN_YUV = 1, + SHARPEN_RGB_FULLSCREEN_YUV = 2, + SHARPEN_FULLSCREEN_ALL = 3 +}; +enum scale_to_sharpness_policy { + NO_SCALE_TO_SHARPNESS_ADJ = 0, + SCALE_TO_SHARPNESS_ADJ_YUV = 1, + SCALE_TO_SHARPNESS_ADJ_ALL = 2 +}; struct spl_funcs { void (*spl_calc_lb_num_partitions) (bool alpha_en, @@ -476,6 +509,7 @@ struct spl_funcs { struct spl_debug { int visual_confirm_base_offset; int visual_confirm_dpp_offset; + enum scale_to_sharpness_policy scale_to_sharpness_policy; }; struct spl_in { @@ -491,6 +525,12 @@ struct spl_in { bool prefer_easf; bool disable_easf; struct spl_debug debug; + bool is_fullscreen; + bool is_hdr_on; + int h_active; + int v_active; + int sdr_white_level_nits; + enum sharpen_policy sharpen_policy; }; // end of SPL inputs diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.c b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.c new file mode 100644 index 0000000000000..be2f34d034c5c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "spl_debug.h" +#include "spl_custom_float.h" + +static bool spl_build_custom_float(struct spl_fixed31_32 value, + const struct spl_custom_float_format *format, + bool *negative, + uint32_t *mantissa, + uint32_t *exponenta) +{ + uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1; + + const struct spl_fixed31_32 mantissa_constant_plus_max_fraction = + spl_fixpt_from_fraction((1LL << (format->mantissa_bits + 1)) - 1, + 1LL << format->mantissa_bits); + + struct spl_fixed31_32 mantiss; + + if (spl_fixpt_eq(value, spl_fixpt_zero)) { + *negative = false; + *mantissa = 0; + *exponenta = 0; + return true; + } + + if (spl_fixpt_lt(value, spl_fixpt_zero)) { + *negative = format->sign; + value = spl_fixpt_neg(value); + } else { + *negative = false; + } + + if (spl_fixpt_lt(value, spl_fixpt_one)) { + uint32_t i = 1; + + do { + value = spl_fixpt_shl(value, 1); + ++i; + } while (spl_fixpt_lt(value, spl_fixpt_one)); + + --i; + + if (exp_offset <= i) { + *mantissa = 0; + *exponenta = 0; + return true; + } + + *exponenta = exp_offset - i; + } else if (spl_fixpt_le(mantissa_constant_plus_max_fraction, value)) { + uint32_t i = 1; + + do { + value = spl_fixpt_shr(value, 1); + ++i; + } while (spl_fixpt_lt(mantissa_constant_plus_max_fraction, value)); + + *exponenta = exp_offset + i - 1; + } else { + *exponenta = exp_offset; + } + + mantiss = spl_fixpt_sub(value, spl_fixpt_one); + + if (spl_fixpt_lt(mantiss, spl_fixpt_zero) || + spl_fixpt_lt(spl_fixpt_one, mantiss)) + mantiss = spl_fixpt_zero; + else + mantiss = spl_fixpt_shl(mantiss, format->mantissa_bits); + + *mantissa = spl_fixpt_floor(mantiss); + + return true; +} + +static bool spl_setup_custom_float(const struct spl_custom_float_format *format, + bool negative, + uint32_t mantissa, + uint32_t exponenta, + uint32_t *result) +{ + uint32_t i = 0; + uint32_t j = 0; + uint32_t value = 0; + + /* verification code: + * once calculation is ok we can remove it + */ + + const uint32_t mantissa_mask = + (1 << (format->mantissa_bits + 1)) - 1; + + const uint32_t exponenta_mask = + (1 << (format->exponenta_bits + 1)) - 1; + + if (mantissa & ~mantissa_mask) { + SPL_BREAK_TO_DEBUGGER(); + mantissa = mantissa_mask; + } + + if (exponenta & ~exponenta_mask) { + SPL_BREAK_TO_DEBUGGER(); + exponenta = exponenta_mask; + } + + /* end of verification code */ + + while (i < format->mantissa_bits) { + uint32_t mask = 1 << i; + + if (mantissa & mask) + value |= mask; + + ++i; + } + + while (j < format->exponenta_bits) { + uint32_t mask = 1 << j; + + if (exponenta & mask) + value |= mask << i; + + ++j; + } + + if (negative && format->sign) + value |= 1 << (i + j); + + *result = value; + + return true; +} + +bool spl_convert_to_custom_float_format(struct spl_fixed31_32 value, + const struct spl_custom_float_format *format, + uint32_t *result) +{ + uint32_t mantissa; + uint32_t exponenta; + bool negative; + + return spl_build_custom_float(value, format, &negative, &mantissa, &exponenta) && + spl_setup_custom_float(format, + negative, + mantissa, + exponenta, + result); +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.h b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.h new file mode 100644 index 0000000000000..cdc4e107b9de4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_custom_float.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef SPL_CUSTOM_FLOAT_H_ +#define SPL_CUSTOM_FLOAT_H_ + +#include "spl_os_types.h" +#include "spl_fixpt31_32.h" + +struct spl_custom_float_format { + uint32_t mantissa_bits; + uint32_t exponenta_bits; + bool sign; +}; + +struct spl_custom_float_value { + uint32_t mantissa; + uint32_t exponenta; + uint32_t value; + bool negative; +}; + +bool spl_convert_to_custom_float_format( + struct spl_fixed31_32 value, + const struct spl_custom_float_format *format, + uint32_t *result); + +#endif //SPL_CUSTOM_FLOAT_H_ diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h new file mode 100644 index 0000000000000..a6f6132df2416 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_debug.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef SPL_DEBUG_H +#define SPL_DEBUG_H + +#if defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB) +#define SPL_ASSERT_CRITICAL(expr) do { \ + if (WARN_ON(!(expr))) { \ + kgdb_breakpoint(); \ + } \ +} while (0) +#else +#define SPL_ASSERT_CRITICAL(expr) do { \ + if (WARN_ON(!(expr))) { \ + ; \ + } \ +} while (0) +#endif /* CONFIG_HAVE_KGDB || CONFIG_KGDB */ + +#if defined(CONFIG_DEBUG_KERNEL_DC) +#define SPL_ASSERT(expr) SPL_ASSERT_CRITICAL(expr) +#else +#define SPL_ASSERT(expr) WARN_ON(!(expr)) +#endif /* CONFIG_DEBUG_KERNEL_DC */ + +#define SPL_BREAK_TO_DEBUGGER() SPL_ASSERT(0) + +#endif // SPL_DEBUG_H diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c new file mode 100644 index 0000000000000..5fd79d9c67e20 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "spl_fixpt31_32.h" + +static const struct spl_fixed31_32 spl_fixpt_two_pi = { 26986075409LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2 = { 2977044471LL }; +static const struct spl_fixed31_32 spl_fixpt_ln2_div_2 = { 1488522236LL }; + +static inline unsigned long long abs_i64( + long long arg) +{ + if (arg > 0) + return (unsigned long long)arg; + else + return (unsigned long long)(-arg); +} + +/* + * @brief + * result = dividend / divisor + * *remainder = dividend % divisor + */ +static inline unsigned long long complete_integer_division_u64( + unsigned long long dividend, + unsigned long long divisor, + unsigned long long *remainder) +{ + unsigned long long result; + + SPL_ASSERT(divisor); + + result = spl_div64_u64_rem(dividend, divisor, remainder); + + return result; +} + + +#define FRACTIONAL_PART_MASK \ + ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) + +#define GET_INTEGER_PART(x) \ + ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + +#define GET_FRACTIONAL_PART(x) \ + (FRACTIONAL_PART_MASK & (x)) + +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = numerator < 0; + bool arg2_negative = denominator < 0; + + unsigned long long arg1_value = arg1_negative ? -numerator : numerator; + unsigned long long arg2_value = arg2_negative ? -denominator : denominator; + + unsigned long long remainder; + + /* determine integer part */ + + unsigned long long res_value = complete_integer_division_u64( + arg1_value, arg2_value, &remainder); + + SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX); + + /* determine fractional part */ + { + unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; + + do { + remainder <<= 1; + + res_value <<= 1; + + if (remainder >= arg2_value) { + res_value |= 1; + remainder -= arg2_value; + } + } while (--i != 0); + } + + /* round up LSB */ + { + unsigned long long summand = (remainder << 1) >= arg2_value; + + SPL_ASSERT(res_value <= (unsigned long long)LLONG_MAX - summand); + + res_value += summand; + } + + res.value = (long long)res_value; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + bool arg1_negative = arg1.value < 0; + bool arg2_negative = arg2.value < 0; + + unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; + unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; + + unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); + unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); + + unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); + unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); + + unsigned long long tmp; + + res.value = arg1_int * arg2_int; + + SPL_ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg1_int * arg2_fra; + + SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg2_int * arg1_fra; + + SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg1_fra * arg2_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + if (arg1_negative ^ arg2_negative) + res.value = -res.value; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + unsigned long long arg_value = abs_i64(arg.value); + + unsigned long long arg_int = GET_INTEGER_PART(arg_value); + + unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); + + unsigned long long tmp; + + res.value = arg_int * arg_int; + + SPL_ASSERT(res.value <= (long long)LONG_MAX); + + res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; + + tmp = arg_int * arg_fra; + + SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + tmp = arg_fra * arg_fra; + + tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + + (tmp >= (unsigned long long)spl_fixpt_half.value); + + SPL_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); + + res.value += tmp; + + return res; +} + +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) +{ + /* + * @note + * Good idea to use Newton's method + */ + + SPL_ASSERT(arg.value); + + return spl_fixpt_from_fraction( + spl_fixpt_one.value, + arg.value); +} + +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 square; + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 27; + + struct spl_fixed31_32 arg_norm = arg; + + if (spl_fixpt_le( + spl_fixpt_two_pi, + spl_fixpt_abs(arg))) { + arg_norm = spl_fixpt_sub( + arg_norm, + spl_fixpt_mul_int( + spl_fixpt_two_pi, + (int)spl_div64_s64( + arg_norm.value, + spl_fixpt_two_pi.value))); + } + + square = spl_fixpt_sqr(arg_norm); + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n > 2); + + if (arg.value != arg_norm.value) + res = spl_fixpt_div( + spl_fixpt_mul(res, arg_norm), + arg); + + return res; +} + +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg) +{ + return spl_fixpt_mul( + arg, + spl_fixpt_sinc(arg)); +} + +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg) +{ + /* TODO implement argument normalization */ + + const struct spl_fixed31_32 square = spl_fixpt_sqr(arg); + + struct spl_fixed31_32 res = spl_fixpt_one; + + int n = 26; + + do { + res = spl_fixpt_sub( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + square, + res), + n * (n - 1))); + + n -= 2; + } while (n != 0); + + return res; +} + +/* + * @brief + * result = exp(arg), + * where abs(arg) < 1 + * + * Calculated as Taylor series. + */ +static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) +{ + unsigned int n = 9; + + struct spl_fixed31_32 res = spl_fixpt_from_fraction( + n + 2, + n + 1); + /* TODO find correct res */ + + SPL_ASSERT(spl_fixpt_lt(arg, spl_fixpt_one)); + + do + res = spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_div_int( + spl_fixpt_mul( + arg, + res), + n)); + while (--n != 1); + + return spl_fixpt_add( + spl_fixpt_one, + spl_fixpt_mul( + arg, + res)); +} + +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg) +{ + /* + * @brief + * Main equation is: + * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), + * where m = round(x / ln(2)), r = x - m * ln(2) + */ + + if (spl_fixpt_le( + spl_fixpt_ln2_div_2, + spl_fixpt_abs(arg))) { + int m = spl_fixpt_round( + spl_fixpt_div( + arg, + spl_fixpt_ln2)); + + struct spl_fixed31_32 r = spl_fixpt_sub( + arg, + spl_fixpt_mul_int( + spl_fixpt_ln2, + m)); + + SPL_ASSERT(m != 0); + + SPL_ASSERT(spl_fixpt_lt( + spl_fixpt_abs(r), + spl_fixpt_one)); + + if (m > 0) + return spl_fixpt_shl( + fixed31_32_exp_from_taylor_series(r), + (unsigned char)m); + else + return spl_fixpt_div_int( + fixed31_32_exp_from_taylor_series(r), + 1LL << -m); + } else if (arg.value != 0) + return fixed31_32_exp_from_taylor_series(arg); + else + return spl_fixpt_one; +} + +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res = spl_fixpt_neg(spl_fixpt_one); + /* TODO improve 1st estimation */ + + struct spl_fixed31_32 error; + + SPL_ASSERT(arg.value > 0); + /* TODO if arg is negative, return NaN */ + /* TODO if arg is zero, return -INF */ + + do { + struct spl_fixed31_32 res1 = spl_fixpt_add( + spl_fixpt_sub( + res, + spl_fixpt_one), + spl_fixpt_div( + arg, + spl_fixpt_exp(res))); + + error = spl_fixpt_sub( + res, + res1); + + res = res1; + /* TODO determine max_allowed_error based on quality of exp() */ + } while (abs_i64(error.value) > 100ULL); + + return res; +} + + +/* this function is a generic helper to translate fixed point value to + * specified integer format that will consist of integer_bits integer part and + * fractional_bits fractional part. For example it is used in + * spl_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional + * part in 32 bits. It is used in hw programming (scaler) + */ + +static inline unsigned int ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + /* 1. create mask of integer part */ + unsigned int result = (1 << integer_bits) - 1; + /* 2. mask out fractional part */ + unsigned int fractional_part = FRACTIONAL_PART_MASK & value; + /* 3. shrink fixed point integer part to be of integer_bits width*/ + result &= GET_INTEGER_PART(value); + /* 4. make space for fractional part to be filled in after integer */ + result <<= fractional_bits; + /* 5. shrink fixed point fractional part to of fractional_bits width*/ + fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; + /* 6. merge the result */ + return result | fractional_part; +} + +static inline unsigned int clamp_ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits, + unsigned int min_clamp) +{ + unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); + + if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) + return (1 << (integer_bits + fractional_bits)) - 1; + else if (truncated_val > min_clamp) + return truncated_val; + else + return min_clamp; +} + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 4, 19); +} + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 3, 19); +} + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 2, 19); +} + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg) +{ + return ux_dy(arg.value, 0, 19); +} + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 14, 1); +} + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 10, 1); +} + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19); + else + return ux_dy(arg.value, 4, 19); +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_zero; + struct spl_fixed31_32 fixpt_int_value = spl_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct spl_fixed31_32 fixpt_value = spl_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h new file mode 100644 index 0000000000000..ed2647f9a0999 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.h @@ -0,0 +1,522 @@ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ + +#ifndef __SPL_FIXED31_32_H__ +#define __SPL_FIXED31_32_H__ + +#include "spl_debug.h" +#include "spl_os_types.h" // swap + +#ifndef LLONG_MAX +#define LLONG_MAX 9223372036854775807ll +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1ll) +#endif + +#define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 +#ifndef LLONG_MIN +#define LLONG_MIN (1LL<<63) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX (-1LL>>1) +#endif + +/* + * @brief + * Arithmetic operations on real numbers + * represented as fixed-point numbers. + * There are: 1 bit for sign, + * 31 bit for integer part, + * 32 bits for fractional part. + * + * @note + * Currently, overflows and underflows are asserted; + * no special result returned. + */ + +struct spl_fixed31_32 { + long long value; +}; + + +/* + * @brief + * Useful constants + */ + +static const struct spl_fixed31_32 spl_fixpt_zero = { 0 }; +static const struct spl_fixed31_32 spl_fixpt_epsilon = { 1LL }; +static const struct spl_fixed31_32 spl_fixpt_half = { 0x80000000LL }; +static const struct spl_fixed31_32 spl_fixpt_one = { 0x100000000LL }; + +/* + * @brief + * Initialization routines + */ + +/* + * @brief + * result = numerator / denominator + */ +struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long denominator); + +/* + * @brief + * result = arg + */ +static inline struct spl_fixed31_32 spl_fixpt_from_int(int arg) +{ + struct spl_fixed31_32 res; + + res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; + + return res; +} + +/* + * @brief + * Unary operators + */ + +/* + * @brief + * result = -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_neg(struct spl_fixed31_32 arg) +{ + struct spl_fixed31_32 res; + + res.value = -arg.value; + + return res; +} + +/* + * @brief + * result = abs(arg) := (arg >= 0) ? arg : -arg + */ +static inline struct spl_fixed31_32 spl_fixpt_abs(struct spl_fixed31_32 arg) +{ + if (arg.value < 0) + return spl_fixpt_neg(arg); + else + return arg; +} + +/* + * @brief + * Binary relational operators + */ + +/* + * @brief + * result = arg1 < arg2 + */ +static inline bool spl_fixpt_lt(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value < arg2.value; +} + +/* + * @brief + * result = arg1 <= arg2 + */ +static inline bool spl_fixpt_le(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value <= arg2.value; +} + +/* + * @brief + * result = arg1 == arg2 + */ +static inline bool spl_fixpt_eq(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return arg1.value == arg2.value; +} + +/* + * @brief + * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_min(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg1; + else + return arg2; +} + +/* + * @brief + * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1 + */ +static inline struct spl_fixed31_32 spl_fixpt_max(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value <= arg2.value) + return arg2; + else + return arg1; +} + +/* + * @brief + * | min_value, when arg <= min_value + * result = | arg, when min_value < arg < max_value + * | max_value, when arg >= max_value + */ +static inline struct spl_fixed31_32 spl_fixpt_clamp( + struct spl_fixed31_32 arg, + struct spl_fixed31_32 min_value, + struct spl_fixed31_32 max_value) +{ + if (spl_fixpt_le(arg, min_value)) + return min_value; + else if (spl_fixpt_le(max_value, arg)) + return max_value; + else + return arg; +} + +/* + * @brief + * Binary shift operators + */ + +/* + * @brief + * result = arg << shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shl(struct spl_fixed31_32 arg, unsigned char shift) +{ + SPL_ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || + ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift)))); + + arg.value = arg.value << shift; + + return arg; +} + +/* + * @brief + * result = arg >> shift + */ +static inline struct spl_fixed31_32 spl_fixpt_shr(struct spl_fixed31_32 arg, unsigned char shift) +{ + bool negative = arg.value < 0; + + if (negative) + arg.value = -arg.value; + arg.value = arg.value >> shift; + if (negative) + arg.value = -arg.value; + return arg; +} + +/* + * @brief + * Binary additive operators + */ + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + SPL_ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || + ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); + + res.value = arg1.value + arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 + arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_add_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_add(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + struct spl_fixed31_32 res; + + SPL_ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || + ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); + + res.value = arg1.value - arg2.value; + + return res; +} + +/* + * @brief + * result = arg1 - arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_sub_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_sub(arg1, spl_fixpt_from_int(arg2)); +} + + +/* + * @brief + * Binary multiplicative operators + */ + +/* + * @brief + * result = arg1 * arg2 + */ +struct spl_fixed31_32 spl_fixpt_mul(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2); + + +/* + * @brief + * result = arg1 * arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_mul_int(struct spl_fixed31_32 arg1, int arg2) +{ + return spl_fixpt_mul(arg1, spl_fixpt_from_int(arg2)); +} + +/* + * @brief + * result = square(arg) := arg * arg + */ +struct spl_fixed31_32 spl_fixpt_sqr(struct spl_fixed31_32 arg); + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div_int(struct spl_fixed31_32 arg1, long long arg2) +{ + return spl_fixpt_from_fraction(arg1.value, spl_fixpt_from_int((int)arg2).value); +} + +/* + * @brief + * result = arg1 / arg2 + */ +static inline struct spl_fixed31_32 spl_fixpt_div(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + return spl_fixpt_from_fraction(arg1.value, arg2.value); +} + +/* + * @brief + * Reciprocal function + */ + +/* + * @brief + * result = reciprocal(arg) := 1 / arg + * + * @note + * No special actions taken in case argument is zero. + */ +struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg); + +/* + * @brief + * Trigonometric functions + */ + +/* + * @brief + * result = sinc(arg) := sin(arg) / arg + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sinc(struct spl_fixed31_32 arg); + +/* + * @brief + * result = sin(arg) + * + * @note + * Argument specified in radians, + * internally it's normalized to [-2pi...2pi] range. + */ +struct spl_fixed31_32 spl_fixpt_sin(struct spl_fixed31_32 arg); + +/* + * @brief + * result = cos(arg) + * + * @note + * Argument specified in radians + * and should be in [-2pi...2pi] range - + * passing arguments outside that range + * will cause incorrect result! + */ +struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg); + +/* + * @brief + * Transcendent functions + */ + +/* + * @brief + * result = exp(arg) + * + * @note + * Currently, function is verified for abs(arg) <= 1. + */ +struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg); + +/* + * @brief + * result = log(arg) + * + * @note + * Currently, abs(arg) should be less than 1. + * No normalization is done. + * Currently, no special actions taken + * in case of invalid argument(s). Take care! + */ +struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg); + +/* + * @brief + * Power function + */ + +/* + * @brief + * result = pow(arg1, arg2) + * + * @note + * Currently, abs(arg1) should be less than 1. Take care! + */ +static inline struct spl_fixed31_32 spl_fixpt_pow(struct spl_fixed31_32 arg1, struct spl_fixed31_32 arg2) +{ + if (arg1.value == 0) + return arg2.value == 0 ? spl_fixpt_one : spl_fixpt_zero; + + return spl_fixpt_exp( + spl_fixpt_mul( + spl_fixpt_log(arg1), + arg2)); +} + +/* + * @brief + * Rounding functions + */ + +/* + * @brief + * result = floor(arg) := greatest integer lower than or equal to arg + */ +static inline int spl_fixpt_floor(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = round(arg) := integer nearest to arg + */ +static inline int spl_fixpt_round(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_half.value; + + SPL_ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* + * @brief + * result = ceil(arg) := lowest integer greater than or equal to arg + */ +static inline int spl_fixpt_ceil(struct spl_fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = spl_fixpt_one.value - + spl_fixpt_epsilon.value; + + SPL_ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} + +/* the following two function are used in scaler hw programming to convert fixed + * point value to format 2 bits from integer part and 19 bits from fractional + * part. The same applies for u0d19, 0 bits from integer part and 19 bits from + * fractional + */ + +unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg); + +unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg); + +int spl_fixpt_s4d19(struct spl_fixed31_32 arg); + +static inline struct spl_fixed31_32 spl_fixpt_truncate(struct spl_fixed31_32 arg, unsigned int frac_bits) +{ + bool negative = arg.value < 0; + + if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) { + SPL_ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART); + return arg; + } + + if (negative) + arg.value = -arg.value; + arg.value &= (~0ULL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits); + if (negative) + arg.value = -arg.value; + return arg; +} + +struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct spl_fixed31_32 spl_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h index 7ebea91c84f6a..2e6ba71960acf 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h @@ -1,32 +1,13 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * Copyright 2019 Raptor Engineering, LLC - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ +/* SPDX-License-Identifier: MIT */ + +/* Copyright 2024 Advanced Micro Devices, Inc. */ +/* Copyright 2019 Raptor Engineering, LLC */ #ifndef _SPL_OS_TYPES_H_ #define _SPL_OS_TYPES_H_ +#include "spl_debug.h" + #include #include #include @@ -39,8 +20,6 @@ * general debug capabilities * */ -// TODO: need backport -#define SPL_BREAK_TO_DEBUGGER() ASSERT(0) static inline uint64_t spl_div_u64_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) { diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index cd70453aeae05..ff27229cc3a47 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -300,6 +300,7 @@ struct dmub_srv_hw_params { enum dmub_ips_disable_type disable_ips; bool disallow_phy_access; bool disable_sldo_opt; + bool enable_non_transparent_setconfig; }; /** @@ -569,6 +570,14 @@ struct dmub_notification { }; }; +/* enum dmub_ips_mode - IPS mode identifier */ +enum dmub_ips_mode { + DMUB_IPS_MODE_IPS1_MAX = 0, + DMUB_IPS_MODE_IPS2, + DMUB_IPS_MODE_IPS1_RCG, + DMUB_IPS_MODE_IPS1_ONO2_ON +}; + /** * DMUB firmware version helper macro - useful for checking if the version * of a firmware to know if feature or functionality is supported or present. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index c5f99cbff0b64..6edd3d34c7b59 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -111,7 +111,7 @@ #define DMUB_MAX_PHANTOM_PLANES ((DMUB_MAX_PLANES) / 2) /* Trace buffer offset for entry */ -#define TRACE_BUFFER_ENTRY_OFFSET 16 +#define TRACE_BUFFER_ENTRY_OFFSET 16 /** * Maximum number of dirty rects supported by FW. @@ -170,6 +170,11 @@ #pragma pack(push, 1) #define ABM_NUM_OF_ACE_SEGMENTS 5 +/** + * Debug FW state offset + */ +#define DMUB_DEBUG_FW_STATE_OFFSET 0x300 + union abm_flags { struct { /** @@ -682,7 +687,7 @@ union dmub_fw_boot_options { uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/ uint32_t usb4_cm_version: 1; /**< 1 CM support */ uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */ - uint32_t reserved0: 1; + uint32_t enable_non_transparent_setconfig: 1; /* 1 if dpia use conventional dp lt flow*/ uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/ uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */ uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/ @@ -747,7 +752,8 @@ union dmub_shared_state_ips_driver_signals { uint32_t allow_ips1 : 1; /**< 1 is IPS1 is allowed */ uint32_t allow_ips2 : 1; /**< 1 is IPS1 is allowed */ uint32_t allow_z10 : 1; /**< 1 if Z10 is allowed */ - uint32_t reserved_bits : 28; /**< Reversed bits */ + uint32_t allow_idle : 1; /**< 1 if driver is allowing idle */ + uint32_t reserved_bits : 27; /**< Reversed bits */ } bits; uint32_t all; }; @@ -1050,12 +1056,107 @@ enum dmub_gpint_command { */ DMUB_GPINT__GET_TRACE_BUFFER_MASK_WORD3 = 119, + /** + * DESC: Set IPS residency measurement + * ARGS: 0 - Disable ips measurement + * 1 - Enable ips measurement + */ + DMUB_GPINT__IPS_RESIDENCY = 121, + /** * DESC: Enable measurements for various task duration * ARGS: 0 - Disable measurement * 1 - Enable measurement */ DMUB_GPINT__TRACE_DMUB_WAKE_ACTIVITY = 123, + + /** + * DESC: Gets IPS residency in microseconds + * ARGS: 0 - Return IPS1 residency + * 1 - Return IPS2 residency + * 2 - Return IPS1_RCG residency + * 3 - Return IPS1_ONO2_ON residency + * RETURN: Total residency in microseconds - lower 32 bits + */ + DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_LO = 124, + + /** + * DESC: Gets IPS1 histogram counts + * ARGS: Bucket index + * RETURN: Total count for the bucket + */ + DMUB_GPINT__GET_IPS1_HISTOGRAM_COUNTER = 125, + + /** + * DESC: Gets IPS2 histogram counts + * ARGS: Bucket index + * RETURN: Total count for the bucket + */ + DMUB_GPINT__GET_IPS2_HISTOGRAM_COUNTER = 126, + + /** + * DESC: Gets IPS residency + * ARGS: 0 - Return IPS1 residency + * 1 - Return IPS2 residency + * 2 - Return IPS1_RCG residency + * 3 - Return IPS1_ONO2_ON residency + * RETURN: Total residency in milli-percent. + */ + DMUB_GPINT__GET_IPS_RESIDENCY_PERCENT = 127, + + /** + * DESC: Gets IPS1_RCG histogram counts + * ARGS: Bucket index + * RETURN: Total count for the bucket + */ + DMUB_GPINT__GET_IPS1_RCG_HISTOGRAM_COUNTER = 128, + + /** + * DESC: Gets IPS1_ONO2_ON histogram counts + * ARGS: Bucket index + * RETURN: Total count for the bucket + */ + DMUB_GPINT__GET_IPS1_ONO2_ON_HISTOGRAM_COUNTER = 129, + + /** + * DESC: Gets IPS entry counter during residency measurement + * ARGS: 0 - Return IPS1 entry counts + * 1 - Return IPS2 entry counts + * 2 - Return IPS1_RCG entry counts + * 3 - Return IPS2_ONO2_ON entry counts + * RETURN: Entry counter for selected IPS mode + */ + DMUB_GPINT__GET_IPS_RESIDENCY_ENTRY_COUNTER = 130, + + /** + * DESC: Gets IPS inactive residency in microseconds + * ARGS: 0 - Return IPS1_MAX residency + * 1 - Return IPS2 residency + * 2 - Return IPS1_RCG residency + * 3 - Return IPS1_ONO2_ON residency + * RETURN: Total inactive residency in microseconds - lower 32 bits + */ + DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_LO = 131, + + /** + * DESC: Gets IPS inactive residency in microseconds + * ARGS: 0 - Return IPS1_MAX residency + * 1 - Return IPS2 residency + * 2 - Return IPS1_RCG residency + * 3 - Return IPS1_ONO2_ON residency + * RETURN: Total inactive residency in microseconds - upper 32 bits + */ + DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_HI = 132, + + /** + * DESC: Gets IPS residency in microseconds + * ARGS: 0 - Return IPS1 residency + * 1 - Return IPS2 residency + * 2 - Return IPS1_RCG residency + * 3 - Return IPS1_ONO2_ON residency + * RETURN: Total residency in microseconds - upper 32 bits + */ + DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_HI = 133, }; /** @@ -1306,8 +1407,10 @@ enum dmub_out_cmd_type { /* DMUB_CMD__DPIA command sub-types. */ enum dmub_cmd_dpia_type { DMUB_CMD__DPIA_DIG1_DPIA_CONTROL = 0, - DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1, + DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1, // will be replaced by DPIA_SET_CONFIG_REQUEST DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2, + DMUB_CMD__DPIA_SET_TPS_NOTIFICATION = 3, + DMUB_CMD__DPIA_SET_CONFIG_REQUEST = 4, }; /* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */ @@ -1879,7 +1982,12 @@ enum dmub_cmd_idle_opt_type { /** * DCN hardware notify idle. */ - DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2 + DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE = 2, + + /** + * DCN hardware notify power state. + */ + DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE = 3, }; /** @@ -1906,6 +2014,33 @@ struct dmub_rb_cmd_idle_opt_dcn_notify_idle { struct dmub_dcn_notify_idle_cntl_data cntl_data; }; +/** + * enum dmub_idle_opt_dc_power_state - DC power states. + */ +enum dmub_idle_opt_dc_power_state { + DMUB_IDLE_OPT_DC_POWER_STATE_UNKNOWN = 0, + DMUB_IDLE_OPT_DC_POWER_STATE_D0 = 1, + DMUB_IDLE_OPT_DC_POWER_STATE_D1 = 2, + DMUB_IDLE_OPT_DC_POWER_STATE_D2 = 4, + DMUB_IDLE_OPT_DC_POWER_STATE_D3 = 8, +}; + +/** + * struct dmub_idle_opt_set_dc_power_state_data - Data passed to FW in a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. + */ +struct dmub_idle_opt_set_dc_power_state_data { + uint8_t power_state; /**< power state */ + uint8_t pad[3]; /**< padding */ +}; + +/** + * struct dmub_rb_cmd_idle_opt_set_dc_power_state - Data passed to FW in a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. + */ +struct dmub_rb_cmd_idle_opt_set_dc_power_state { + struct dmub_cmd_header header; /**< header */ + struct dmub_idle_opt_set_dc_power_state_data data; +}; + /** * struct dmub_clocks - Clock update notification. */ @@ -2064,7 +2199,7 @@ struct dmub_rb_cmd_dig1_dpia_control { }; /** - * SET_CONFIG Command Payload + * SET_CONFIG Command Payload (deprecated) */ struct set_config_cmd_payload { uint8_t msg_type; /* set config message type */ @@ -2072,7 +2207,7 @@ struct set_config_cmd_payload { }; /** - * Data passed from driver to FW in a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command. + * Data passed from driver to FW in a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command. (deprecated) */ struct dmub_cmd_set_config_control_data { struct set_config_cmd_payload cmd_pkt; @@ -2080,6 +2215,17 @@ struct dmub_cmd_set_config_control_data { uint8_t immed_status; /* Immediate status returned in case of error */ }; +/** + * SET_CONFIG Request Command Payload + */ +struct set_config_request_cmd_payload { + uint8_t instance; /* DPIA instance */ + uint8_t immed_status; /* Immediate status returned in case of error */ + uint8_t msg_type; /* set config message type */ + uint8_t reserved; + uint32_t msg_data; /* set config message data */ +}; + /** * DMUB command structure for SET_CONFIG command. */ @@ -2088,6 +2234,14 @@ struct dmub_rb_cmd_set_config_access { struct dmub_cmd_set_config_control_data set_config_control; /* set config data */ }; +/** + * DMUB command structure for SET_CONFIG request command. + */ +struct dmub_rb_cmd_set_config_request { + struct dmub_cmd_header header; /* header */ + struct set_config_request_cmd_payload payload; /* set config request payload */ +}; + /** * Data passed from driver to FW in a DMUB_CMD__DPIA_MST_ALLOC_SLOTS command. */ @@ -2106,6 +2260,24 @@ struct dmub_rb_cmd_set_mst_alloc_slots { struct dmub_cmd_mst_alloc_slots_control_data mst_slots_control; /* mst slots control */ }; +/** + * Data passed from driver to FW in a DMUB_CMD__SET_TPS_NOTIFICATION command. + */ +struct dmub_cmd_tps_notification_data { + uint8_t instance; /* DPIA instance */ + uint8_t tps; /* requested training pattern */ + uint8_t reserved1; + uint8_t reserved2; +}; + +/** + * DMUB command structure for SET_TPS_NOTIFICATION command. + */ +struct dmub_rb_cmd_set_tps_notification { + struct dmub_cmd_header header; /* header */ + struct dmub_cmd_tps_notification_data tps_notification; /* set tps_notification data */ +}; + /** * DMUB command structure for DPIA HPD int enable control. */ @@ -4264,10 +4436,38 @@ struct dmub_cmd_abm_set_backlight_data { */ uint8_t panel_mask; + /** + * Backlight control type. + * Value 0 is PWM backlight control. + * Value 1 is VAUX backlight control. + * Value 2 is AMD DPCD AUX backlight control. + */ + uint8_t backlight_control_type; + /** * Explicit padding to 4 byte boundary. */ - uint8_t pad[2]; + uint8_t pad[1]; + + /** + * Minimum luminance in nits. + */ + uint32_t min_luminance; + + /** + * Maximum luminance in nits. + */ + uint32_t max_luminance; + + /** + * Minimum backlight in pwm. + */ + uint32_t min_backlight_pwm; + + /** + * Maximum backlight in pwm. + */ + uint32_t max_backlight_pwm; }; /** @@ -5267,11 +5467,19 @@ union dmub_rb_cmd { /** * Definition of a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command. */ - struct dmub_rb_cmd_set_config_access set_config_access; + struct dmub_rb_cmd_set_config_access set_config_access; // (deprecated) + /** + * Definition of a DMUB_CMD__DPIA_SET_CONFIG_ACCESS command. + */ + struct dmub_rb_cmd_set_config_request set_config_request; /** * Definition of a DMUB_CMD__DPIA_MST_ALLOC_SLOTS command. */ struct dmub_rb_cmd_set_mst_alloc_slots set_mst_alloc_slots; + /** + * Definition of a DMUB_CMD__DPIA_SET_TPS_NOTIFICATION command. + */ + struct dmub_rb_cmd_set_tps_notification set_tps_notification; /** * Definition of a DMUB_CMD__EDID_CEA command. */ @@ -5298,6 +5506,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__IDLE_OPT_DCN_NOTIFY_IDLE command. */ struct dmub_rb_cmd_idle_opt_dcn_notify_idle idle_opt_notify_idle; + /** + * Definition of a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. + */ + struct dmub_rb_cmd_idle_opt_set_dc_power_state idle_opt_set_dc_power_state; /* * Definition of a DMUB_CMD__REPLAY_COPY_SETTINGS command. */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 746696b6f09a8..07336382471e6 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -425,6 +425,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.ips_disable = params->disable_ips; boot_options.bits.ips_sequential_ono = params->ips_sequential_ono; boot_options.bits.disable_sldo_opt = params->disable_sldo_opt; + boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig; REG_WRITE(DMCUB_SCRATCH14, boot_options.all); } @@ -462,7 +463,7 @@ uint32_t dmub_dcn35_get_current_time(struct dmub_srv *dmub) void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) { - uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; + uint32_t is_dmub_enabled, is_soft_reset; uint32_t is_traceport_enabled, is_cw6_enabled; if (!dmub || !diag_data) @@ -512,9 +513,6 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); diag_data->is_dmcub_soft_reset = is_soft_reset; - REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - diag_data->is_dmcub_secure_reset = is_sec_reset; - REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); diag_data->is_traceport_en = is_traceport_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c index ca0c8a54b635e..88bf59ee5fea7 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.c @@ -63,11 +63,18 @@ static void set_reg_field_values(struct dmub_reg_value_masks *field_value_mask, } } +/* + * v6.0-2372-g92ffdc98c5f7 + * ("drm/amd/display: Enable Freesync over PCon") + * verify __DM_SERVICES_H__ to fix the redefine function declaration for backport.h. + */ +#ifndef __DM_SERVICES_H__ static inline uint32_t get_reg_field_value_ex(uint32_t reg_value, uint32_t mask, uint8_t shift) { return (mask & reg_value) >> shift; } +#endif void dmub_reg_update(struct dmub_srv *srv, uint32_t addr, int n, uint8_t shift1, uint32_t mask1, uint32_t field_value1, ...) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h index 123d1704670ee..10a87a277be22 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_reg.h @@ -28,6 +28,14 @@ #include "../inc/dmub_cmd.h" +#ifdef REG_SET +#undef REG_SET +#endif + +#ifdef REG_GET +#undef REG_GET +#endif + struct dmub_srv; /* Register offset and field lookup. */ @@ -42,7 +50,14 @@ struct dmub_srv; #define REG(reg) (REGS)->offset.reg +/* + * v6.0-2372-g92ffdc98c5f7 + * ("drm/amd/display: Enable Freesync over PCon") + * verify __DM_SERVICES_H__ to fix the redefine function declaration for backport.h. + */ +#ifndef __DM_SERVICES_H__ #define FD(reg_field) (REGS)->shift.reg_field, (REGS)->mask.reg_field +#endif #define FN(reg_name, field) FD(reg_name##__##field) diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h index aee5170f5fb23..de8f3cfed6c84 100644 --- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h +++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h @@ -164,18 +164,19 @@ enum dpcd_psr_sink_states { PSR_SINK_STATE_SINK_INTERNAL_ERROR = 7, }; -#define DP_SOURCE_SEQUENCE 0x30c -#define DP_SOURCE_TABLE_REVISION 0x310 -#define DP_SOURCE_PAYLOAD_SIZE 0x311 -#define DP_SOURCE_SINK_CAP 0x317 -#define DP_SOURCE_BACKLIGHT_LEVEL 0x320 -#define DP_SOURCE_BACKLIGHT_CURRENT_PEAK 0x326 -#define DP_SOURCE_BACKLIGHT_CONTROL 0x32E -#define DP_SOURCE_BACKLIGHT_ENABLE 0x32F -#define DP_SOURCE_MINIMUM_HBLANK_SUPPORTED 0x340 +#define DP_SOURCE_SEQUENCE 0x30C +#define DP_SOURCE_TABLE_REVISION 0x310 +#define DP_SOURCE_PAYLOAD_SIZE 0x311 +#define DP_SOURCE_SINK_CAP 0x317 +#define DP_SOURCE_BACKLIGHT_LEVEL 0x320 +#define DP_SOURCE_BACKLIGHT_CURRENT_PEAK 0x326 +#define DP_SOURCE_BACKLIGHT_CONTROL 0x32E +#define DP_SOURCE_BACKLIGHT_ENABLE 0x32F +#define DP_SOURCE_MINIMUM_HBLANK_SUPPORTED 0x340 #define DP_SINK_PR_REPLAY_STATUS 0x378 #define DP_SINK_PR_PIXEL_DEVIATION_PER_LINE 0x379 #define DP_SINK_PR_MAX_NUMBER_OF_DEVIATION_LINE 0x37A +#define DP_SINK_EMISSION_RATE 0x37E /* Remove once drm_dp_helper.h is updated upstream */ #ifndef DP_TOTAL_LTTPR_CNT diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index a48d564d1660c..4d68c1c6e2100 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -61,11 +61,13 @@ #define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__) #define DC_LOG_DSC(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) -#define DC_LOG_MALL(...) pr_debug("[MALL]:"__VA_ARGS__) #define DC_LOG_DWB(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_DP2(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_AUTO_DPM_TEST(...) pr_debug("[AutoDPMTest]: "__VA_ARGS__) #define DC_LOG_IPS(...) pr_debug("[IPS]: "__VA_ARGS__) +#define DC_LOG_MALL(...) pr_debug("[MALL]:"__VA_ARGS__) +#define DC_LOG_REGISTER_READ(...) pr_debug("[REGISTER_READ]: "__VA_ARGS__) +#define DC_LOG_REGISTER_WRITE(...) pr_debug("[REGISTER_WRITE]: "__VA_ARGS__) struct dc_log_buffer_ctx { char *buf; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index a40e6590215a6..fc4f5415ce0ba 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -48,6 +48,7 @@ #define VSYNCS_BETWEEN_FLIP_THRESHOLD 2 #define FREESYNC_CONSEC_FLIP_AFTER_VSYNC 5 #define FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US 500 +#define MICRO_HZ_TO_HZ(x) (x / 1000000) struct core_freesync { struct mod_freesync public; @@ -121,6 +122,17 @@ static unsigned int calc_duration_in_us_from_v_total( return duration_in_us; } +static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) +{ + unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total; + + if (stream->ctx->dc->caps.vtotal_limited_by_fp2) { + max_hw_v_total -= stream->timing.v_front_porch + 1; + } + + return max_hw_v_total; +} + unsigned int mod_freesync_calc_v_total_from_refresh( const struct dc_stream_state *stream, unsigned int refresh_in_uhz) @@ -132,9 +144,19 @@ unsigned int mod_freesync_calc_v_total_from_refresh( ((unsigned int)(div64_u64((1000000000ULL * 1000000), refresh_in_uhz))); - v_total = div64_u64(div64_u64(((unsigned long long)( - frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), - stream->timing.h_total), 1000000); + if (MICRO_HZ_TO_HZ(refresh_in_uhz) <= stream->timing.min_refresh_in_uhz) { + /* When the target refresh rate is the minimum panel refresh rate, + * round down the vtotal value to avoid stretching vblank over + * panel's vtotal boundary. + */ + v_total = div64_u64(div64_u64(((unsigned long long)( + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), + stream->timing.h_total), 1000000); + } else { + v_total = div64_u64(div64_u64(((unsigned long long)( + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), + stream->timing.h_total) + 500000, 1000000); + } /* v_total cannot be less than nominal */ if (v_total < stream->timing.v_total) { @@ -1002,7 +1024,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, if (stream->ctx->dc->caps.max_v_total != 0 && stream->timing.h_total != 0) { min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL), - (stream->timing.h_total * (long long)stream->ctx->dc->caps.max_v_total)); + (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream))); } /* Limit minimum refresh rate to what can be supported by hardware */ min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ? diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c index 7ecf76aea9505..6e064e6ae949f 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c @@ -25,7 +25,9 @@ #include "hdcp.h" +#ifndef MIN #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif #define HDCP_I2C_ADDR 0x3a /* 0x74 >> 1*/ #define KSV_READ_SIZE 0xf /* 0x6803b - 0x6802c */ #define HDCP_MAX_AUX_TRANSACTION_SIZE 16 diff --git a/drivers/gpu/drm/amd/dkms/Makefile b/drivers/gpu/drm/amd/dkms/Makefile new file mode 100644 index 0000000000000..6153df061bcd8 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/Makefile @@ -0,0 +1,230 @@ +ifeq ($(CC), gcc) +GCCMAJ=$(shell echo __GNUC__ | $(CC) -E -x c - | tail -n 1) +GCCMIN=$(shell echo __GNUC_MINOR__ | $(CC) -E -x c - | tail -n 1) +GCCPAT=$(shell echo __GNUC_PATCHLEVEL__ | $(CC) -E -x c - | tail -n 1) +# CONFIG_GCC_VERSION returns x.xx.xx as the version format +GCCSTR=$(shell printf "%d%02d%02d" $(GCCMAJ) $(GCCMIN) $(GCCPAT)) + +KERNEL_MAJ=$(VERSION) +KERNEL_PATCHLEVEL=$(PATCHLEVEL) +KERNEL_SUBLEVEL=$(SUBLEVEL) +KERNEL_VER=$(shell printf "%d%02d%02d" $(KERNEL_MAJ) $(KERNEL_PATCHLEVEL) $(KERNEL_SUBLEVEL)) + +kernel-version = $(shell [ $(KERNEL_VER)0 $(1) $(2)000 ] && echo $(3) || echo $(4)) + +ifdef CONFIG_CC_IS_GCC +ifeq ($(shell [ $(CONFIG_GCC_VERSION) -ne $(GCCSTR) ] && echo y), y) +$(warning "Local GCC version $(GCCSTR) does not match kernel compiler GCC version $(CONFIG_GCC_VERSION)") +$(warning "This may cause unexpected and hard-to-isolate compiler-related issues") +endif +else +export CONFIG_CC_IS_GCC=y +export CONFIG_GCC_VERSION=$(GCCSTR) +$(warning "CONFIG_CC_IS_GCC is not defined. Let's export it with version $(CONFIG_GCC_VERSION)") +endif + +endif + +include $(src)/amd/dkms/Makefile.compiler + +# gcc 4.8.5 is too old for kernel >= 5.4, which will cause the compile failure. +ifneq ($(call gcc-min-version, 40805), y) +ifeq ($(call kernel-version, -ge, 0504, y), y) +$(error "The GCC is too old for this kernel, please update the GCC to higher than 9.3") +endif +endif + +ifndef CONFIG_DRM +$(error CONFIG_DRM disabled, exit...) +endif + +ifeq (y,$(CONFIG_DRM_AMDGPU)) +$(error DRM_AMDGPU is built-in, exit...) +endif + +ifndef CONFIG_KALLSYMS +$(error CONFIG_KALLSYMS disabled, kallsyms_lookup_name() is absent, exit...) +endif + +_is_kcl_macro_defined=$(shell grep $1 $(src)/amd/dkms/config/config.h | grep -q "define" && echo "y" || echo "n") + +ifeq ($(call _is_kcl_macro_defined,HAVE_DMA_RESV_SEQ_BUG),y) +$(error dma_resv->seq is missing. exit...) +endif + +ifeq ($(call _is_kcl_macro_defined,HAVE_RESERVATION_WW_CLASS_BUG),y) +$(error reservation_ww_class is missing. exit...) +endif + +DRM_VER=$(shell sed -n 's/^RHEL_DRM_VERSION = \(.*\)/\1/p' $(srctree)/Makefile) +DRM_PATCH=$(shell sed -n 's/^RHEL_DRM_PATCHLEVEL = \(.*\)/\1/p' $(srctree)/Makefile) +ifeq ($(DRM_VER),) +DRM_VER = $(VERSION) +DRM_PATCH = $(PATCHLEVEL) +endif + +subdir-ccflags-y += \ + -DDRM_VER=$(DRM_VER) \ + -DDRM_PATCH=$(DRM_PATCH) \ + -DDRM_SUB="0" + +define get_rhel_version +printf "#include \n$(1)" | $(CC) $(LINUXINCLUDE) -E -x c - | tail -n 1 | grep -v $(1) +endef +RHEL_MAJOR := $(shell $(call get_rhel_version,RHEL_MAJOR)) +RHEL_MINOR := $(shell $(call get_rhel_version,RHEL_MINOR)) + +ifneq (,$(RHEL_MAJOR)) +OS_NAME = "rhel" +OS_VERSION = "$(RHEL_MAJOR).$(RHEL_MINOR)" +else ifneq (,$(wildcard /etc/os-release)) +OS_NAME = "$(shell sed -n 's/^ID=\(.*\)/\1/p' /etc/os-release | tr -d '\"')" +# On CentOS/RHEL, users could have installed a kernel not distributed from RHEL +ifeq ("centos",$(OS_NAME)) +OS_NAME="custom-rhel" +else ifeq ("rhel",$(OS_NAME)) +OS_NAME="custom-rhel" +else ifeq ("linuxmint",$(OS_NAME)) +OS_NAME="ubuntu" +endif +OS_VERSION = $(shell sed -n 's/^VERSION_ID=\(.*\)/\1/p' /etc/os-release) +else +OS_NAME = "unknown" +OS_VERSION = "0.0" +endif + +OS_VERSION_STR = $(subst .,_,$(OS_VERSION)) + +ifeq ("ubuntu",$(OS_NAME)) +subdir-ccflags-y += -DOS_NAME_UBUNTU +else ifeq ("rhel",$(OS_NAME)) +subdir-ccflags-y += -DOS_NAME_RHEL +else ifeq ("steamos",$(OS_NAME)) +subdir-ccflags-y += -DOS_NAME_STEAMOS +else ifeq ("sled",$(OS_NAME)) +subdir-ccflags-y += -DOS_NAME_SLE +else ifeq ("sles",$(OS_NAME)) +subdir-ccflags-y += -DOS_NAME_SLE +else ifeq ("amzn",$(OS_NAME)) +subdir-ccflags-y += -DOS_NAME_AMZ +else ifeq ("debian",$(OS_NAME)) +subdir-ccflags-y += -DOS_NAME_DEBIAN +else +subdir-ccflags-y += -DOS_NAME_UNKNOWN +endif + +subdir-ccflags-y += \ + -DOS_VERSION_MAJOR=$(shell echo $(OS_VERSION).0 | cut -d. -f1) \ + -DOS_VERSION_MINOR=$(shell echo $(OS_VERSION).0 | cut -d. -f2) + +ifeq ($(OS_NAME),"opensuse-leap") +subdir-ccflags-y += -DOS_NAME_SUSE_$(OS_VERSION_STR) +endif + +ifeq ($(OS_NAME),"sled") +subdir-ccflags-y += -DOS_NAME_SUSE_$(OS_VERSION_STR) +endif + +ifeq ($(OS_NAME),"sles") +subdir-ccflags-y += -DOS_NAME_SUSE_$(OS_VERSION_STR) +endif + +ifeq ($(OS_NAME),"ubuntu") +OS_BUILD_NUM = $(shell echo $(KERNELRELEASE) | cut -d '-' -f 2) +subdir-ccflags-y += -DUBUNTU_BUILD_NUM=$(OS_BUILD_NUM) +OS_OEM = "$(shell echo $(KERNELRELEASE) | cut -d '-' -f 3)" +ifeq ($(OS_OEM),"oem") +subdir-ccflags-y += -DOS_NAME_UBUNTU_OEM +endif +subdir-ccflags-y += -DOS_NAME_UBUNTU_$(OS_VERSION_STR) +endif + +ifeq ($(OS_NAME),"rhel") +subdir-ccflags-y += -DOS_NAME_RHEL_$(OS_VERSION_STR) + +ifeq ($(RHEL_MAJOR),7) +subdir-ccflags-y += -DOS_NAME_RHEL_7_X \ + -include /usr/src/kernels/$(KERNELRELEASE)/include/drm/drm_backport.h +else ifeq ($(RHEL_MAJOR),8) +subdir-ccflags-y += -DOS_NAME_RHEL_8_X +endif +endif + +export OS_NAME OS_VERSION + +_KCL_LINUXINCLUDE=$(subst -I ,-I,$(strip $(LINUXINCLUDE))) +LINUX_SRCTREE_INCLUDE := \ + $(filter-out -I%/uapi "-include %/kconfig.h",$(_KCL_LINUXINCLUDE)) +USER_INCLUDE := $(filter-out $(LINUX_SRCTREE_INCLUDE), $(_KCL_LINUXINCLUDE)) + +LINUXINCLUDE := \ + -I$(src)/include \ + -I$(src)/include/kcl/header \ + -include $(src)/include/kcl/kcl_version.h \ + -include $(src)/include/rename_symbol.h \ + -include $(src)/amd/dkms/config/config.h \ + $(LINUX_SRCTREE_INCLUDE) \ + -I$(src)/include/uapi \ + $(USER_INCLUDE) + +export CONFIG_HSA_AMD=y +export CONFIG_DRM_TTM=m +export CONFIG_DRM_TTM_DMA_PAGE_POOL=y +export CONFIG_DRM_AMDGPU=m +export CONFIG_DRM_SCHED=m +export CONFIG_DRM_AMDGPU_CIK=y +export CONFIG_DRM_AMDGPU_SI=y +export CONFIG_DRM_AMDGPU_USERPTR=y +export CONFIG_DRM_AMD_DC=y + +subdir-ccflags-y += -DCONFIG_HSA_AMD +subdir-ccflags-y += -DCONFIG_DRM_TTM_DMA_PAGE_POOL +subdir-ccflags-y += -DCONFIG_DRM_AMDGPU_CIK +subdir-ccflags-y += -DCONFIG_DRM_AMDGPU_SI +subdir-ccflags-y += -DCONFIG_DRM_AMDGPU_USERPTR +subdir-ccflags-y += -DCONFIG_DRM_AMD_DC +subdir-ccflags-y += -Wno-error + +ifeq ($(call _is_kcl_macro_defined,HAVE_AMDKCL_HMM_MIRROR_ENABLED),y) +ifdef CONFIG_DEVICE_PRIVATE +export CONFIG_HSA_AMD_SVM=y +subdir-ccflags-y += -DCONFIG_HSA_AMD_SVM +endif +endif + +export CONFIG_DRM_AMD_DC_HDCP=y +subdir-ccflags-y += -DCONFIG_DRM_AMD_DC_HDCP + +ifeq (y,$(CONFIG_PCI_P2PDMA)) + ifeq (y,$(CONFIG_DMABUF_MOVENOTIFY)) + export CONFIG_HSA_AMD_P2P=y + subdir-ccflags-y += -DCONFIG_HSA_AMD_P2P + endif +endif + +# Trying to enable DCN2/3 with core2 optimizations will result in +# older versions of GCC hanging during building/installing. Check +# if the compiler is using core2 optimizations and only build DCN2/3 +# if core2 isn't in the compiler flags +ifndef CONFIG_ARM64 +ifeq ($(filter %core2, $(KBUILD_CFLAGS)),) +export CONFIG_DRM_AMD_DC_FP=y +subdir-ccflags-y += -DCONFIG_DRM_AMD_DC_FP +endif +endif + +# v5.17-rc4-3-ge8c07082a810 (Kbuild: move to -std=gnu11) +# Upstream patches now uses gnu11/gnu99 as the default C standard version. +# However, gcc in legacy OS still uses gnu89, which will introduce a standard +# build gap leading to a DKMS build failure possibly. So add below check to +# move gnu89 to gnu99 if KBUILD_CFLAGS still uses gnu89. +ifeq ($(findstring gnu89,$(KBUILD_CFLAGS)),gnu89) +KBUILD_CFLAGS := $(subst gnu89,gnu99,$(KBUILD_CFLAGS)) +$(warning "The local C standard(gnu89) doesn't match kernel default C standard(gnu11/gnu99)") +endif + +include $(src)/amd/dkms/Makefile.drm_ttm_helper + +include $(src)/amd/dkms/Makefile.drm_buddy + +obj-m += scheduler/ amd/amdgpu/ amd/amdxcp/ ttm/ amd/amdkcl/ diff --git a/drivers/gpu/drm/amd/dkms/Makefile.compiler b/drivers/gpu/drm/amd/dkms/Makefile.compiler new file mode 100644 index 0000000000000..9c546ebcbee5a --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/Makefile.compiler @@ -0,0 +1,3 @@ +ifndef gcc-min-version +gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y) +endif diff --git a/drivers/gpu/drm/amd/dkms/Makefile.config b/drivers/gpu/drm/amd/dkms/Makefile.config new file mode 100644 index 0000000000000..211fd393987e3 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/Makefile.config @@ -0,0 +1,21 @@ +dkmstree := drivers/gpu/drm/amd/dkms +srctree := $(subst /$(dkmstree),,$(realpath $(dir $(lastword $(MAKEFILE_LIST))))) + +all: config clean + +config: force + @( \ + cd $(srctree)/$(dkmstree); \ + ./autogen.sh; \ + ./configure \ + --enable-linux-builtin \ + --with-linux=$(srctree) \ + ) + +clean: force + @( \ + cd $(srctree)/$(dkmstree); \ + rm -rf aclocal.m4 config.* configure config/*.in* build_*\ + ) + +.PHONY: all force diff --git a/drivers/gpu/drm/amd/dkms/Makefile.drm_buddy b/drivers/gpu/drm/amd/dkms/Makefile.drm_buddy new file mode 100644 index 0000000000000..208c05b48758d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/Makefile.drm_buddy @@ -0,0 +1,6 @@ +export CONFIG_DRM_BUDDY=m + +subdir-ccflags-y += -DCONFIG_DRM_BUDDY +CFLAGS_drm_buddy.o += -DHAVE_CONFIG_H +amddrm_buddy-y := drm_buddy.o +obj-$(CONFIG_DRM_BUDDY) += amddrm_buddy.o diff --git a/drivers/gpu/drm/amd/dkms/Makefile.drm_ttm_helper b/drivers/gpu/drm/amd/dkms/Makefile.drm_ttm_helper new file mode 100644 index 0000000000000..b76db38d020a2 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/Makefile.drm_ttm_helper @@ -0,0 +1,8 @@ +export CONFIG_DRM_TTM_HELPER=m + +subdir-ccflags-y += -DCONFIG_DRM_TTM_HELPER +CFLAGS_drm_gem_ttm_helper.o += -include $(src)/ttm/backport/backport.h \ + -include $(src)/include/kcl/backport/kcl_drm_gem.h \ + -DHAVE_CONFIG_H +amddrm_ttm_helper-y := drm_gem_ttm_helper.o +obj-$(CONFIG_DRM_TTM_HELPER) += amddrm_ttm_helper.o diff --git a/drivers/gpu/drm/amd/dkms/autogen.sh b/drivers/gpu/drm/amd/dkms/autogen.sh new file mode 100755 index 0000000000000..d72f86b5a2f97 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/autogen.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +autoreconf -fiv +[[ $? -eq 0 ]] || exit $? +rm -Rf autom4te.cache diff --git a/drivers/gpu/drm/amd/dkms/config/config-amd-chips.h b/drivers/gpu/drm/amd/dkms/config/config-amd-chips.h new file mode 100644 index 0000000000000..9ff8bd1cb6a04 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/config/config-amd-chips.h @@ -0,0 +1,3 @@ +/* + * This file is managed by DKMS build. Do not edit. + */ diff --git a/drivers/gpu/drm/amd/dkms/config/config.h b/drivers/gpu/drm/amd/dkms/config/config.h new file mode 100644 index 0000000000000..57c340185ca23 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/config/config.h @@ -0,0 +1,1106 @@ +/* config/config.h. Generated from config.h.in by configure. */ +/* config/config.h.in. Generated from configure.ac by autoheader. */ + +/* whether invalidate_range_start() wants 2 args */ +#define HAVE_2ARGS_INVALIDATE_RANGE_START 1 + +/* whether invalidate_range_start() wants 5 args */ +/* #undef HAVE_5ARGS_INVALIDATE_RANGE_START */ + +/* whether access_ok(x, x) is available */ +#define HAVE_ACCESS_OK_WITH_TWO_ARGUMENTS 1 + +/* acpi_dev_get_first_match_dev() is available */ +#define HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV 1 + +/* acpi_put_table() is available */ +#define HAVE_ACPI_PUT_TABLE 1 + +/* struct acpi_srat_generic_affinity is available */ +#define HAVE_ACPI_SRAT_GENERIC_AFFINITY 1 + +/* acpi_video_backlight_use_native() is available */ +#define HAVE_ACPI_VIDEO_BACKLIGHT_USE_NATIVE 1 + +/* acpi_video_register_backlight() is available */ +#define HAVE_ACPI_VIDEO_REGISTER_BACKLIGHT 1 + +/* whether AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES is defined */ +#define HAVE_AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 1 + +/* *FLAGS_.o support to take the path relative to $(obj) */ +#define HAVE_AMDKCL_FLAGS_TAKE_PATH 1 + +/* hmm support is enabled */ +#define HAVE_AMDKCL_HMM_MIRROR_ENABLED 1 + +/* amd_iommu_invalidate_ctx take arg type of pasid as u32 */ +/* #undef HAVE_AMD_IOMMU_INVALIDATE_CTX_PASID_U32 */ + +/* amd_iommu_pc_get_max_banks() declared */ +/* #undef HAVE_AMD_IOMMU_PC_GET_MAX_BANKS_DECLARED */ + +/* amd_iommu_pc_get_max_banks() arg is unsigned int */ +/* #undef HAVE_AMD_IOMMU_PC_GET_MAX_BANKS_UINT */ + +/* amd_iommu_pc_supported() is available */ +/* #undef HAVE_AMD_IOMMU_PC_SUPPORTED */ + +/* apple_gmux_detect() is available */ +#define HAVE_APPLE_GMUX_DETECT 1 + +/* arch_io_{reserve/free}_memtype_wc() are available */ +#define HAVE_ARCH_IO_RESERVE_FREE_MEMTYPE_WC 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ASM_FPU_API_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ASM_SET_MEMORY_H 1 + +/* __assign_str() wants 1 arguments */ +#define HAVE_ASSIGN_STR_ONE_ARGUMENT 1 + +/* amdgpu_attr_group->is_bin_visible is available */ +#define HAVE_ATTRIBUTE_GROUP_IS_BIN_VISIBLE 1 + +/* backlight_device_set_brightness() is available */ +#define HAVE_BACKLIGHT_DEVICE_SET_BRIGHTNESS 1 + +/* bitmap_free(),bitmap_alloc(),bitmap_zalloc is available */ +#define HAVE_BITMAP_FUNCS 1 + +/* bitmap_to_arr32() is available */ +#define HAVE_BITMAP_TO_ARR32 1 + +/* cancel_work() is available */ +#define HAVE_CANCEL_WORK 1 + +/* whether CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL is defined */ +#define HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL 1 + +/* compat_ptr_ioctl() is available */ +#define HAVE_COMPAT_PTR_IOCTL 1 + +/* cpuinfo_x86.topo is available */ +#define HAVE_CPUINFO_TOPOLOGY_IN_CPUINFO_X86_STRUCT 1 + +/* drm_edid_raw() is available */ +#define HAVE_CRTC_DRM_VBLANK_CRTC 1 + +/* debugfs_create_file_size() is available */ +#define HAVE_DEBUGFS_CREATE_FILE_SIZE 1 + +/* kobj_type->default_groups is available */ +#define HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE 1 + +/* devcgroup_check_permission() is available */ +#define HAVE_DEVCGROUP_CHECK_PERMISSION 1 + +/* MEMORY_DEVICE_COHERENT is availablea */ +#define HAVE_DEVICE_COHERENT 1 + +/* dev_is_removable() is available */ +#define HAVE_DEV_IS_REMOVABLE 1 + +/* dev_pagemap->owner is available */ +#define HAVE_DEV_PAGEMAP_OWNER 1 + +/* there is 'range' field within dev_pagemap structure */ +#define HAVE_DEV_PAGEMAP_RANGE 1 + +/* dev_pm_set_driver_flags() is available */ +#define HAVE_DEV_PM_SET_DRIVER_FLAGS 1 + +/* dma_buf_is_dynamic() is available */ +#define HAVE_DMA_BUF_IS_DYNAMIC 1 + +/* dma_buf->dynamic_mapping is available */ +/* #undef HAVE_DMA_BUF_OPS_DYNAMIC_MAPPING */ + +/* dma_buf->dynamic_mapping is not available */ +/* #undef HAVE_DMA_BUF_OPS_LEGACY */ + +/* dma_fence_chain_alloc() is available */ +#define HAVE_DMA_FENCE_CHAIN_ALLOC 1 + +/* dma_fence_chain_contained() is available */ +#define HAVE_DMA_FENCE_CHAIN_CONTAINED 1 + +/* dma_fence_describe() is available */ +#define HAVE_DMA_FENCE_DESCRIBE 1 + +/* dma_fence_is_container() is available */ +#define HAVE_DMA_FENCE_IS_CONTAINER 1 + +/* dma_fence_is_later_or_same() is available */ +#define HAVE_DMA_FENCE_IS_LATER_OR_SAME 1 + +/* struct dma_fence_ops has callback set_deadline */ +#define HAVE_DMA_FENCE_OPS_SET_DEADLINE 1 + +/* struct dma_fence_ops has use_64bit_seqno field */ +#define HAVE_DMA_FENCE_OPS_USE_64BIT_SEQNO 1 + +/* dma_fence_TIMESTAMP() is available */ +#define HAVE_DMA_FENCE_TIMESTAMP 1 + +/* dma_map_sgtable() is enabled */ +#define HAVE_DMA_MAP_SGTABLE 1 + +/* dma_resv->fences is available */ +#define HAVE_DMA_RESV_FENCES 1 + +/* dma_resv->seq is available */ +/* #undef HAVE_DMA_RESV_SEQ */ + +/* dma_resv->seq is seqcount_ww_mutex_t */ +/* #undef HAVE_DMA_RESV_SEQCOUNT_WW_MUTEX_T */ + +/* Reporting dma_resv->seq bug */ +/* #undef HAVE_DMA_RESV_SEQ_BUG */ + +/* down_read_killable() is available */ +#define HAVE_DOWN_READ_KILLABLE 1 + +/* down_write_killable() is available */ +#define HAVE_DOWN_WRITE_KILLABLE 1 + +/* drm_add_override_edid_modes() is available */ +/* #undef HAVE_DRM_ADD_OVERRIDE_EDID_MODES */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DRM_AMDGPU_PCIID_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_AMD_ASIC_TYPE_H 1 + +/* drm_aperture_remove_conflicting_pci_framebuffers() second arg is + drm_driver* */ +#define HAVE_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_DRM_DRIVER_ARG 1 + +/* drm_atomic_helper_calc_timestamping_constants() is available */ +#define HAVE_DRM_ATOMIC_HELPER_CALC_TIMESTAMPING_CONSTANTS 1 + +/* drm_atomic_plane_enabling() is available */ +#define HAVE_DRM_ATOMIC_PLANE_ENABLING 1 + +/* drm_atomic_private_obj_init() wants 4 args */ +#define HAVE_DRM_ATOMIC_PRIVATE_OBJ_INIT_4ARGS 1 + +/* drm_client_register() is available */ +#define HAVE_DRM_CLIENT_REGISTER 1 + +/* struct drm_color_ctm_3x4 is available */ +/* #undef HAVE_DRM_COLOR_CTM_3X4 */ + +/* drm_connector_atomic_hdr_metadata_equal() is available */ +#define HAVE_DRM_CONNECTOR_ATOMIC_HDR_METADATA_EQUAL 1 + +/* drm_connector_attach_hdr_output_metadata_property() is available */ +#define HAVE_DRM_CONNECTOR_ATTACH_HDR_OUTPUT_METADATA_PROPERTY 1 + +/* drm_connector->edid_override is available */ +#define HAVE_DRM_CONNECTOR_EDID_OVERRIDE 1 + +/* drm_connector_for_each_possible_encoder() wants 2 arguments */ +#define HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS 1 + +/* atomic_best_encoder take 2nd arg type of state as struct drm_atomic_state + */ +#define HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_BEST_ENCODER_ARG_DRM_ATOMIC_STATE 1 + +/* drm_connector_helper_funcs->atomic_check() wants struct drm_atomic_state + arg */ +#define HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE 1 + +/* drm_connector_helper_funcs->prepare_writeback_job is available */ +#define HAVE_DRM_CONNECTOR_HELPER_FUNCS_PREPARE_WRITEBACK_JOB 1 + +/* drm_connector_init_with_ddc() is available */ +#define HAVE_DRM_CONNECTOR_INIT_WITH_DDC 1 + +/* drm_connector_set_panel_orientation_with_quirk() is available */ +#define HAVE_DRM_CONNECTOR_SET_PANEL_ORIENTATION_WITH_QUIRK 1 + +/* drm_connector_state->colorspace is available */ +#define HAVE_DRM_CONNECTOR_STATE_COLORSPACE 1 + +/* struct drm_connector_state has hdcp_content_type member */ +#define HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE 1 + +/* struct drm_connector_state has hdr_output_metadata member */ +#define HAVE_DRM_CONNECTOR_STATE_HDR_OUTPUT_METADATA 1 + +/* drm_connector_attach_colorspace_property() is available */ +#define HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY 1 + +/* drm_crtc_helper_funcs->atomic_check()/atomic_flush()/atomic_begin() wants + struct drm_atomic_state arg */ +#define HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE 1 + +/* drm_crtc_helper_funcs->atomic_enable()/atomic_disable() wants struct + drm_atomic_state arg */ +#define HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_ENABLE_ARG_DRM_ATOMIC_STATE 1 + +/* drm_dbg_printer() is available */ +#define HAVE_DRM_DBG_PRINTER 1 + +/* enum drm_debug_category is available */ +#define HAVE_DRM_DEBUG_CATEGORY 1 + +/* drm_debug_enabled() is available */ +#define HAVE_DRM_DEBUG_ENABLED 1 + +/* drm_device->open_count is int */ +/* #undef HAVE_DRM_DEVICE_OPEN_COUNT_INT */ + +/* struct drm_device has pdev member */ +/* #undef HAVE_DRM_DEVICE_PDEV */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DISPLAY_DRM_DP_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DISPLAY_DRM_DP_HELPER_H 1 + +/* Define to 1 if you have the header file. + */ +#define HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DISPLAY_DRM_DSC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DISPLAY_DRM_DSC_HELPER_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DISPLAY_DRM_HDCP_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DISPLAY_DRM_HDCP_HELPER_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DISPLAY_DRM_HDMI_HELPER_H 1 + +/* display_info->edid_hdmi_rgb444_dc_modes is available */ +#define HAVE_DRM_DISPLAY_INFO_EDID_HDMI_RGB444_DC_MODES 1 + +/* display_info->is_hdmi is available */ +#define HAVE_DRM_DISPLAY_INFO_IS_HDMI 1 + +/* display_info->luminance_range is available */ +#define HAVE_DRM_DISPLAY_INFO_LUMINANCE_RANGE 1 + +/* display_info->max_dsc_bpp is available */ +#define HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP 1 + +/* struct drm_display_info has monitor_range member */ +#define HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE 1 + +/* drm_dp_add_payload_part2 has three arguments */ +/* #undef HAVE_DRM_DP_ADD_PAYLOAD_PART2_THREE_ARGUMENTS */ + +/* drm_dp_atomic_find_time_slots() is available */ +#define HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS 1 + +/* drm_dp_atomic_find_vcpi_slots() wants 5args */ +/* #undef HAVE_DRM_DP_ATOMIC_FIND_VCPI_SLOTS_5ARGS */ + +/* drm_dp_atomic_release_time_slots() is available */ +#define HAVE_DRM_DP_ATOMIC_RELEASE_TIME_SLOTS 1 + +/* drm_dp_atomic_release_vcpi_slots() with drm_dp_mst_port argument is + available */ +/* #undef HAVE_DRM_DP_ATOMIC_RELEASE_VCPI_SLOTS_MST_PORT */ + +/* drm_dp_mst_atomic_setup_commit() is available */ +#define HAVE_DRM_DP_ATOMIC_SETUP_COMMIT 1 + +/* drm_dp_mst_atomic_wait_for_dependencies() is available */ +#define HAVE_DRM_DP_ATOMIC_WAIT_FOR_DEPENDENCIES 1 + +/* struct drm_dp_aux has member named 'drm_dev' */ +#define HAVE_DRM_DP_AUX_DRM_DEV 1 + +/* drm_dp_calc_pbn_mode() wants 3 args */ +/* #undef HAVE_DRM_DP_CALC_PBN_MODE_3ARGS */ + +/* drm_dp_cec_register_connector() wants p,p interface */ +#define HAVE_DRM_DP_CEC_REGISTER_CONNECTOR_PP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DRM_DP_DRM_DP_HELPER_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DRM_DP_DRM_DP_MST_HELPER_H */ + +/* drm_dp_link_train_channel_eq_delay() has 2 args */ +#define HAVE_DRM_DP_LINK_TRAIN_CHANNEL_EQ_DELAY_2ARGS 1 + +/* drm_dp_link_train_clock_recovery_delay() has 2 args */ +#define HAVE_DRM_DP_LINK_TRAIN_CLOCK_RECOVERY_DELAY_2ARGS 1 + +/* drm_dp_mst_add_affected_dsc_crtcs() is available */ +#define HAVE_DRM_DP_MST_ADD_AFFECTED_DSC_CRTCS 1 + +/* drm_dp_mst_atomic_check() is available */ +#define HAVE_DRM_DP_MST_ATOMIC_CHECK 1 + +/* drm_dp_mst_atomic_enable_dsc() is available */ +#define HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC 1 + +/* drm_dp_mst_atomic_enable_dsc() wants 5args */ +/* #undef HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC_WITH_5_ARGS */ + +/* the guid of struct drm_dp_mst_branch is guid_t */ +#define HAVE_DRM_DP_MST_BRANCH_GUID_T 1 + +/* drm_dp_mst_connector_early_unregister() is available */ +#define HAVE_DRM_DP_MST_CONNECTOR_EARLY_UNREGISTER 1 + +/* drm_dp_mst_connector_late_register() is available */ +#define HAVE_DRM_DP_MST_CONNECTOR_LATE_REGISTER 1 + +/* drm_dp_mst_detect_port() wants p,p,p,p args */ +#define HAVE_DRM_DP_MST_DETECT_PORT_PPPP 1 + +/* drm_dp_mst_dsc_aux_for_port() is available */ +#define HAVE_DRM_DP_MST_DSC_AUX_FOR_PORT 1 + +/* drm_dp_mst_{get,put}_port_malloc() is available */ +#define HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC 1 + +/* drm_dp_mst_hpd_irq_handle_event() is available */ +#define HAVE_DRM_DP_MST_HPD_IRQ_HANDLE_EVENT 1 + +/* drm_dp_mst_port struct has full_pbn member */ +#define HAVE_DRM_DP_MST_PORT_FULL_PBN 1 + +/* struct drm_dp_mst_port has passthrough_aux member */ +#define HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX 1 + +/* drm_dp_mst_root_conn_atomic_check() is available */ +#define HAVE_DRM_DP_MST_ROOT_CONN_ATOMIC_CHECK 1 + +/* struct drm_dp_mst_topology_cbs->destroy_connector is available */ +/* #undef HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR */ + +/* struct drm_dp_mst_topology_cbs has hotplug member */ +/* #undef HAVE_DRM_DP_MST_TOPOLOGY_CBS_HOTPLUG */ + +/* struct drm_dp_mst_topology_cbs->poll_hpd_irq is available */ +#define HAVE_DRM_DP_MST_TOPOLOGY_CBS_POLL_HPD_IRQ 1 + +/* struct drm_dp_mst_topology_cbs->register_connector is available */ +/* #undef HAVE_DRM_DP_MST_TOPOLOGY_CBS_REGISTER_CONNECTOR */ + +/* struct drm_dp_mst_topology_mgr.base is available */ +#define HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE 1 + +/* drm_dp_mst_topology_mgr_init() has max_lane_count and max_link_rate */ +/* #undef HAVE_DRM_DP_MST_TOPOLOGY_MGR_INIT_MAX_LANE_COUNT */ + +/* drm_dp_mst_topology_mgr_resume() wants 2 args */ +#define HAVE_DRM_DP_MST_TOPOLOGY_MGR_RESUME_2ARGS 1 + +/* struct drm_dp_mst_topology_state has member payloads */ +#define HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS 1 + +/* struct drm_dp_mst_topology_state has member pbn_div */ +/* #undef HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT */ + +/* struct drm_dp_mst_topology_state has union member pbn_div */ +#define HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION 1 + +/* struct drm_dp_mst_topology_state has member total_avail_slots */ +#define HAVE_DRM_DP_MST_TOPOLOGY_STATE_TOTAL_AVAIL_SLOTS 1 + +/* drm_dp_read_dpcd_caps() is available */ +#define HAVE_DRM_DP_READ_DPCD_CAPS 1 + +/* drm_dp_remove_payload_part{1,2}() is available */ +#define HAVE_DRM_DP_REMOVE_RAYLOAD_PART 1 + +/* drm_dp_send_real_edid_checksum() is available */ +#define HAVE_DRM_DP_SEND_REAL_EDID_CHECKSUM 1 + +/* drm_dp_update_payload_part1() function has start_slot argument */ +/* #undef HAVE_DRM_DP_UPDATE_PAYLOAD_PART1_START_SLOT_ARG */ + +/* drm_driver->gem_prime_mmap() is available */ +/* #undef HAVE_DRM_DRIVER_GEM_PRIME_MMAP */ + +/* drm_driver->gem_prime_res_obj() is available */ +/* #undef HAVE_DRM_DRIVER_GEM_PRIME_RES_OBJ */ + +/* drm_driver->show_fdinfo() is available */ +#define HAVE_DRM_DRIVER_SHOW_FDINFO 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DRM_DRMP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_APERTURE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DRM_DRM_BACKPORT_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_ELD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_EXEC_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DRM_DRM_FBDEV_GENERIC_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_FBDEV_TTM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_GEM_ATOMIC_HELPER_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_MANAGED_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_PROBE_HELPER_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_DRM_SUBALLOC_H 1 + +/* drm_driver_feature DRIVER_IRQ_SHARED is available */ +/* #undef HAVE_DRM_DRV_DRIVER_IRQ_SHARED */ + +/* drm_driver_feature DRIVER_PRIME is available */ +/* #undef HAVE_DRM_DRV_DRIVER_PRIME */ + +/* drm_driver_feature DRIVER_SYNCOBJ_TIMELINE is available */ +#define HAVE_DRM_DRV_DRIVER_SYNCOBJ_TIMELINE 1 + +/* drm_gem_prime_export() with p,i arg is available */ +#define HAVE_DRM_DRV_GEM_PRIME_EXPORT_PI 1 + +/* drm_dsc_compute_rc_parameters() is available */ +#define HAVE_DRM_DSC_COMPUTE_RC_PARAMETERS 1 + +/* struct drm_dsc_config has member simple_422 */ +#define HAVE_DRM_DSC_CONFIG_SIMPLE_422 1 + +/* drm_dsc_pps_payload_pack() is available */ +#define HAVE_DRM_DSC_PPS_PAYLOAD_PACK 1 + +/* drm_edid_alloc() is available */ +#define HAVE_DRM_EDID_MALLOC 1 + +/* drm_edid_override_connector_update() is available */ +#define HAVE_DRM_EDID_OVERRIDE_CONNECTOR_UPDATE 1 + +/* drm_edid_raw() is available */ +#define HAVE_DRM_EDID_RAW 1 + +/* drm_edid_valid() is available */ +#define HAVE_DRM_EDID_VALID 1 + +/* drm_exec() has 3 arguments */ +#define HAVE_DRM_EXEC_INIT_3_ARGUMENTS 1 + +/* drm_fb_helper_fill_info() is available */ +#define HAVE_DRM_FB_HELPER_FILL_INFO 1 + +/* drm_fb_helper_init() has 2 args */ +#define HAVE_DRM_FB_HELPER_INIT_2ARGS 1 + +/* drm_fb_helper_init() has 3 args */ +/* #undef HAVE_DRM_FB_HELPER_INIT_3ARGS */ + +/* drm_firmware_drivers_only() is available */ +#define HAVE_DRM_FIRMWARE_DRIVERS_ONLY 1 + +/* drm_format_info.block_w and rm_format_info.block_h is available */ +#define HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED 1 + +/* drm_gem_object_funcs->vmap() has 2 args */ +#define HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_2ARGS 1 + +/* drm_gem_object_funcs.vmap hsa iosys_map arg */ +#define HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG 1 + +/* drm_gem_object_is_shared_for_memory_stats() is available */ +#define HAVE_DRM_GEM_OBJECT_IS_SHARED_FOR_MEMORY_STATS 1 + +/* drm_gem_object_put() is available */ +#define HAVE_DRM_GEM_OBJECT_PUT 1 + +/* drm_gem_object_put() is exported */ +/* #undef HAVE_DRM_GEM_OBJECT_PUT_SYMBOL */ + +/* drm_gem_object->resv/_resv is available */ +#define HAVE_DRM_GEM_OBJECT_RESV 1 + +/* drm_gem_plane_helper_prepare_fb() is available */ +#define HAVE_DRM_GEM_PLANE_HELPER_PREPARE_FB 1 + +/* drm_gem_prime_handle_to_dmabuf() is available */ +#define HAVE_DRM_GEM_PRIME_HANDLE_TO_DMABUF 1 + +/* drm_gem_prime_handle_to_fd() is available */ +#define HAVE_DRM_GEM_PRIME_HANDLE_TO_FD 1 + +/* drm_hdcp_update_content_protection is available */ +#define HAVE_DRM_HDCP_UPDATE_CONTENT_PROTECTION 1 + +/* drm_hdmi_avi_infoframe_from_display_mode() has p,p,b interface */ +/* #undef HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_B */ + +/* drm_hdmi_avi_infoframe_from_display_mode() has p,p,p interface */ +#define HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P 1 + +/* drm_helper_force_disable_all() is available */ +#define HAVE_DRM_HELPER_FORCE_DISABLE_ALL 1 + +/* drm_kms_helper_connector_hotplug_event() function is available */ +#define HAVE_DRM_KMS_HELPER_CONNECTOR_HOTPLUG_EVENT 1 + +/* drm_memcpy_from_wc() is availablea */ +/* #undef HAVE_DRM_MEMCPY_FROM_WC */ + +/* drm_memcpy_from_wc() is availablea and has struct iosys_map* arg */ +#define HAVE_DRM_MEMCPY_FROM_WC_IOSYS_MAP_ARG 1 + +/* drm_mode_config->dp_subconnector_property is available */ +#define HAVE_DRM_MODE_CONFIG_DP_SUBCONNECTOR_PROPERTY 1 + +/* drm_mode_config->fb_base is available */ +/* #undef HAVE_DRM_MODE_CONFIG_FB_BASE */ + +/* drm_mode_config->fb_modifiers_not_supported is available */ +#define HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED 1 + +/* drm_mode_create_dp_colorspace_property() has 2 args */ +#define HAVE_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_2ARGS 1 + +/* drm_mode_create_hdmi_colorspace_property() has 2 args */ +#define HAVE_DRM_MODE_CREATE_HDMI_COLORSPACE_PROPERTY_2ARGS 1 + +/* drm_mode_init() is available */ +#define HAVE_DRM_MODE_INIT 1 + +/* drm_need_swiotlb() is availablea */ +#define HAVE_DRM_NEED_SWIOTLB 1 + +/* drm_plane_helper_destroy() is available */ +#define HAVE_DRM_PLANE_HELPER_DESTROY 1 + +/* drm_prime_pages_to_sg() wants 3 arguments */ +#define HAVE_DRM_PRIME_PAGES_TO_SG_3ARGS 1 + +/* drm_prime_sg_to_dma_addr_array() is available */ +#define HAVE_DRM_PRIME_SG_TO_DMA_ADDR_ARRAY 1 + +/* drm_print_bits() is available */ +#define HAVE_DRM_PRINT_BITS 1 + +/* drm_print_bits() has 4 args */ +#define HAVE_DRM_PRINT_BITS_4ARGS 1 + +/* drm_show_fdinfo() is available */ +#define HAVE_DRM_SHOW_FDINFO 1 + +/* drm_simple_encoder is available */ +#define HAVE_DRM_SIMPLE_ENCODER_INIT 1 + +/* Has function drm_suballoc_manager_init() */ +#define HAVE_DRM_SUBALLOC_MANAGER_INIT 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DRM_TASK_BARRIER_H 1 + +/* drm_vblank_crtc_config is available */ +/* #undef HAVE_DRM_VBLANK_CRTC_CONFIG */ + +/* struct drm_vma_offset_node has readonly field */ +/* #undef HAVE_DRM_VMA_OFFSET_NODE_READONLY_FIELD */ + +/* drm_writeback_connector_init() has 7 args */ +#define HAVE_DRM_WRITEBACK_CONNECTOR_INIT_7_ARGS 1 + +/* fault_flag_allow_retry_first() is available */ +#define HAVE_FAULT_FLAG_ALLOW_RETRY_FIRST 1 + +/* follow_pfn() is available */ +/* #undef HAVE_FOLLOW_PFN */ + +/* fsleep() is available */ +#define HAVE_FSLEEP 1 + +/* fs_reclaim_acquire() is available */ +#define HAVE_FS_RECLAIM_ACQUIRE 1 + +/* generic_handle_domain_irq() is available */ +#define HAVE_GENERIC_HANDLE_DOMAIN_IRQ 1 + +/* get_user_pages() wants 6 args */ +/* #undef HAVE_GET_USER_PAGES_6ARGS */ + +/* get_user_pages() wants gup_flags parameter */ +/* #undef HAVE_GET_USER_PAGES_GUP_FLAGS */ + +/* get_user_pages_remote() wants gup_flags parameter */ +/* #undef HAVE_GET_USER_PAGES_REMOTE_GUP_FLAGS */ + +/* get_user_pages_remote() is introduced with initial prototype */ +/* #undef HAVE_GET_USER_PAGES_REMOTE_INTRODUCED */ + +/* get_user_pages_remote() wants locked parameter */ +/* #undef HAVE_GET_USER_PAGES_REMOTE_LOCKED */ + +/* get_user_pages_remote() remove task_struct pointer */ +/* #undef HAVE_GET_USER_PAGES_REMOTE_REMOVE_TASK_STRUCT */ + +/* get_user_pages_remote() remove argument vmas */ +#define HAVE_GET_USER_PAGES_REMOTE_REMOVE_VMAS 1 + +/* get_user_pages() remove vmas argument */ +#define HAVE_GET_USER_PAGES_REMOVE_VMAS 1 + +/* drm_connector_hdr_sink_metadata() is available */ +#define HAVE_HDR_SINK_METADATA 1 + +/* hmm remove the customizable pfn format */ +#define HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT 1 + +/* hmm_range_fault() wants 1 arg */ +#define HAVE_HMM_RANGE_FAULT_1ARG 1 + +/* hypervisor_is_type() is available */ +#define HAVE_HYPERVISOR_IS_TYPE 1 + +/* struct i2c_lock_operations is defined */ +#define HAVE_I2C_LOCK_OPERATIONS_STRUCT 1 + +/* i2c_new_client_device() is enabled */ +#define HAVE_I2C_NEW_CLIENT_DEVICE 1 + +/* idr_init_base() is available */ +#define HAVE_IDR_INIT_BASE 1 + +/* idr_remove return void pointer */ +#define HAVE_IDR_REMOVE_RETURN_VOID_POINTER 1 + +/* import_guid() is available */ +#define HAVE_IMPORT_GUID 1 + +/* in_compat_syscall is defined */ +#define HAVE_IN_COMPAT_SYSCALL 1 + +/* io_mapping_map_local_wc() is available */ +#define HAVE_IO_MAPPING_MAP_LOCAL_WC 1 + +/* io_mapping_unmap_local() is available */ +#define HAVE_IO_MAPPING_UNMAP_LOCAL 1 + +/* is_cow_mapping() is available */ +#define HAVE_IS_COW_MAPPING 1 + +/* jiffies64_to_msecs() is available */ +#define HAVE_JIFFIES64_TO_MSECS 1 + +/* kallsyms_lookup_name is available */ +/* #undef HAVE_KALLSYMS_LOOKUP_NAME */ + +/* close_fd() is available */ +#define HAVE_KERNEL_CLOSE_FD 1 + +/* kernel_write() take arg type of position as pointer */ +#define HAVE_KERNEL_WRITE_PPOS 1 + +/* kfifo_put() have non pointer parameter */ +#define HAVE_KFIFO_PUT_NON_POINTER 1 + +/* kmalloc_size_roundup is available */ +#define HAVE_KMALLOC_SIZE_ROUNDUP 1 + +/* kmap_local_* is available */ +#define HAVE_KMAP_LOCAL 1 + +/* krealloc_array() is available */ +#define HAVE_KREALLOC_ARRAY 1 + +/* ksys_fd() is available */ +/* #undef HAVE_KSYS_CLOSE_FD */ + +/* ksys_sync_helper() is available */ +#define HAVE_KSYS_SYNC_HELPER 1 + +/* kthread_{use,unuse}_mm() is available */ +#define HAVE_KTHREAD_USE_MM 1 + +/* ktime_get_boottime_ns() is available */ +#define HAVE_KTIME_GET_BOOTTIME_NS 1 + +/* ktime_get_ns is available */ +#define HAVE_KTIME_GET_NS 1 + +/* ktime_get_raw_ns is available */ +#define HAVE_KTIME_GET_RAW_NS 1 + +/* ktime_t is union */ +/* #undef HAVE_KTIME_IS_UNION */ + +/* kvrealloc() is available */ +#define HAVE_KVREALLOC 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_ACPI_AMD_WBRF_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_APPLE_GMUX_H 1 + +/* atomic_long_try_cmpxchg() is available */ +#define HAVE_LINUX_ATOMIC_LONG_TRY_CMPXCHG 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_BITS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_BUILD_BUG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_CC_PLATFORM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_COMPILER_ATTRIBUTES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_CONTAINER_OF_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_DEVICE_CLASS_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_DMA_ATTRS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_DMA_BUF_MAP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_DMA_FENCE_CHAIN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_DMA_MAP_OPS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_DMA_RESV_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FENCE_ARRAY_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_IOSYS_MAP_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_IO_64_NONATOMIC_LO_HI_H 1 + +/* local_try_cmpchg() is available */ +#define HAVE_LINUX_LOCAL_TRY_CMPXCHG 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_MMAP_LOCK_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_PCI_P2PDMA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_PGTABLE_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_PROCESSOR_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_UNITS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_LINUX_XARRAY_H 1 + +/* list_cmp_func() is const param */ +#define HAVE_LIST_CMP_FUNC_IS_CONST_PARAM 1 + +/* list_is_first() is available */ +#define HAVE_LIST_IS_FIRST 1 + +/* list_rotate_to_front() is available */ +#define HAVE_LIST_ROTATE_TO_FRONT 1 + +/* enum MCE_PRIO_UC is available */ +#define HAVE_MCE_PRIO_UC 1 + +/* memalloc_nofs_{save,restore}() are available */ +#define HAVE_MEMALLOC_NOFS_SAVE 1 + +/* memalloc_noreclaim_save() is available */ +#define HAVE_MEMALLOC_NORECLAIM_SAVE 1 + +/* migrate_disable() is available */ +#define HAVE_MIGRATE_DISABLE 1 + +/* struct migrate_vma has fault_page */ +#define HAVE_MIGRATE_VMA_FAULT_PAGE 1 + +/* migrate_vma->pgmap_owner is available */ +#define HAVE_MIGRATE_VMA_PGMAP_OWNER 1 + +/* mmap_assert_write_locked() is available */ +#define HAVE_MMAP_ASSERT_WRITE_LOCKED 1 + +/* mmput_async() is available */ +#define HAVE_MMPUT_ASYNC 1 + +/* mmu_notifier_call_srcu() is available */ +/* #undef HAVE_MMU_NOTIFIER_CALL_SRCU */ + +/* mmu_notifier_put() is available */ +#define HAVE_MMU_NOTIFIER_PUT 1 + +/* mmu_notifier_range_blockable() is available */ +#define HAVE_MMU_NOTIFIER_RANGE_BLOCKABLE 1 + +/* mmu_notifier_synchronize() is available */ +#define HAVE_MMU_NOTIFIER_SYNCHRONIZE 1 + +/* release_pages() wants 2 args */ +#define HAVE_MM_RELEASE_PAGES_2ARGS 1 + +/* class_create has one argument */ +#define HAVE_ONE_ARGUMENT_OF_CLASS_CREATE 1 + +/* pcie_aspm_enabled() is available */ +#define HAVE_PCIE_ASPM_ENABLED 1 + +/* PCI driver handles extended tags */ +#define HAVE_PCI_CONFIGURE_EXTENDED_TAGS 1 + +/* pci_dev_id() is available */ +#define HAVE_PCI_DEV_ID 1 + +/* strurct pci_dev->ltr_path is available */ +#define HAVE_PCI_DEV_LTR_PATH 1 + +/* struct pci_driver has field dev_groups */ +#define HAVE_PCI_DRIVER_DEV_GROUPS 1 + +/* pci_get_base_class() is available */ +#define HAVE_PCI_GET_BASE_CLASS 1 + +/* pci_pr3_present() is available */ +#define HAVE_PCI_PR3_PRESENT 1 + +/* pci_rebar_bytes_to_size() is available */ +#define HAVE_PCI_REBAR_BYTES_TO_SIZE 1 + +/* pci_upstream_bridge() is available */ +#define HAVE_PCI_UPSTREAM_BRIDGE 1 + +/* pfn_t is defined */ +#define HAVE_PFN_T 1 + +/* vm_insert_mixed() wants pfn_t arg */ +/* #undef HAVE_PFN_T_VM_INSERT_MIXED */ + +/* PIDTYPE is availablea */ +#define HAVE_PIDTYPE_TGID 1 + +/* pm_suspend_target_state is available */ +#define HAVE_PM_SUSPEND_TARGET_STATE 1 + +/* pm_suspend_via_firmware() is available */ +#define HAVE_PM_SUSPEND_VIA_FIRMWARE 1 + +/* pxm_to_node() is available */ +#define HAVE_PXM_TO_NODE 1 + +/* queue_work_node() is available */ +#define HAVE_QUEUE_WORK_NODE 1 + +/* radix_tree_iter_delete() is available */ +#define HAVE_RADIX_TREE_ITER_DELETE 1 + +/* rb_add_cached is available */ +#define HAVE_RB_ADD_CACHED 1 + +/* whether register_shrinker(x, x) is available */ +/* #undef HAVE_REGISTER_SHRINKER_WITH_TWO_ARGUMENTS */ + +/* remove_conflicting_pci_framebuffers() wants p,p args */ +/* #undef HAVE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PP */ + +/* seq_hex_dump() is available */ +#define HAVE_SEQ_HEX_DUMP 1 + +/* shrinker_register() is available */ +#define HAVE_SHRINKER_REGISTER 1 + +/* smca_get_bank_type(x) is available */ +/* #undef HAVE_SMCA_GET_BANK_TYPE_WITH_ONE_ARGUMENT */ + +/* whether smca_get_bank_type(x, x) is available */ +#define HAVE_SMCA_GET_BANK_TYPE_WITH_TWO_ARGUMENTS 1 + +/* enum SMCA_UMC_V2 is available */ +#define HAVE_SMCA_UMC_V2 1 + +/* struct dma_buf_attach_ops->allow_peer2peer is available */ +#define HAVE_STRUCT_DMA_BUF_ATTACH_OPS_ALLOW_PEER2PEER 1 + +/* struct dma_buf_ops->pin() is available */ +#define HAVE_STRUCT_DMA_BUF_OPS_PIN 1 + +/* struct dma_fence_chain is available */ +#define HAVE_STRUCT_DMA_FENCE_CHAIN 1 + +/* struct drm_connector_state->duplicated is available */ +#define HAVE_STRUCT_DRM_ATOMIC_STATE_DUPLICATED 1 + +/* struct drm_connector_state->colorspace is available */ +#define HAVE_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE 1 + +/* struct drm_connector_state->self_refresh_aware is available */ +#define HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE 1 + +/* HAVE_STRUCT_DRM_CRTC_FUNCS_GAMMA_SET_OPTIONAL is available */ +#define HAVE_STRUCT_DRM_CRTC_FUNCS_GAMMA_SET_OPTIONAL 1 + +/* struct drm_crtc_funcs->get_vblank_timestamp() is available */ +#define HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP 1 + +/* struct drm_crtc_state->async_flip is available */ +#define HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP 1 + +/* drm_gem_open_object is defined in struct drm_drv */ +/* #undef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK */ + +/* drm_plane_helper_funcs->atomic_check() second param wants drm_atomic_state + arg */ +#define HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS 1 + +/* ide->idr_base is available */ +#define HAVE_STRUCT_IDE_IDR_BASE 1 + +/* struct smca_bank is available */ +/* #undef HAVE_STRUCT_SMCA_BANK */ + +/* struct xarray is available */ +#define HAVE_STRUCT_XARRAY 1 + +/* zone->managed_pages is available */ +/* #undef HAVE_STRUCT_ZONE_MANAGED_PAGES */ + +/* str_yes_no() is defined */ +#define HAVE_STR_YES_NO 1 + +/* synchronize_shrinkers() is available */ +/* #undef HAVE_SYNCHRONIZE_SHRINKERS */ + +/* sysfs_emit() and sysfs_emit_at() are available */ +#define HAVE_SYSFS_EMIT 1 + +/* topology_num_cores_per_package is availablea */ +#define HAVE_TOPOLOGY_NUM_CORES_PER_PACKAGE 1 + +/* totalram_pages() is available */ +#define HAVE_TOTALRAM_PAGES 1 + +/* interval_tree_insert have struct rb_root_cached */ +#define HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED 1 + +/* __poll_t is available */ +#define HAVE_TYPE__POLL_T 1 + +/* vga_client_register() don't pass a cookie */ +#define HAVE_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE 1 + +/* vga_remove_vgacon() is available */ +#define HAVE_VGA_REMOVE_VGACON 1 + +/* vma_is_initial_{heap, stack} is available */ +#define HAVE_VMA_IS_INITIAL_HEAP 1 + +/* vma_lookup() is available */ +#define HAVE_VMA_LOOKUP 1 + +/* vmf_insert_*() are available */ +#define HAVE_VMF_INSERT 1 + +/* vmf_insert_mixed_prot() is available */ +/* #undef HAVE_VMF_INSERT_MIXED_PROT */ + +/* vmf_insert_pfn_prot() is available */ +#define HAVE_VMF_INSERT_PFN_PROT 1 + +/* vm_fault->{address/vma} is available */ +#define HAVE_VM_FAULT_ADDRESS_VMA 1 + +/* vm_flags_{set, clear} is available */ +#define HAVE_VM_FLAGS_SET 1 + +/* vm_insert_pfn_prot() is available */ +/* #undef HAVE_VM_INSERT_PFN_PROT */ + +/* vm_operations_struct->fault() wants 1 arg */ +#define HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG 1 + +/* wait_queue_entry_t exists */ +#define HAVE_WAIT_QUEUE_ENTRY 1 + +/* want_init_on_free() is available */ +#define HAVE_WANT_INIT_ON_FREE 1 + +/* ww_mutex_trylock() has context arg */ +#define HAVE_WW_MUTEX_TRYLOCK_CONTEXT_ARG 1 + +/* enum x86_hypervisor_type is available */ +#define HAVE_X86_HYPERVISOR_TYPE 1 + +/* zone_device_page_init() is available */ +#define HAVE_ZONE_DEVICE_PAGE_INIT 1 + +/* zone_managed_pages() is available */ +#define HAVE_ZONE_MANAGED_PAGES 1 + +/* __dma_fence_is_later() is available and has 2 args */ +/* #undef HAVE__DMA_FENCE_IS_LATER_2ARGS */ + +/* __dma_fence_is_later() is available and has ops arg */ +#define HAVE__DMA_FENCE_IS_LATER_WITH_OPS_ARG 1 + +/* __drm_atomic_helper_crtc_reset() is available */ +#define HAVE___DRM_ATOMIC_HELPER_CRTC_RESET 1 + +/* __kthread_should_park() is available */ +/* #undef HAVE___KTHREAD_SHOULD_PARK */ + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "amdgpu-dkms" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "amdgpu-dkms 6.10.0" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "amdgpu-dkms" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "6.10.0" + +#include "config-amd-chips.h" + +#define AMDGPU_VERSION PACKAGE_VERSION diff --git a/drivers/gpu/drm/amd/dkms/config/install-sh b/drivers/gpu/drm/amd/dkms/config/install-sh new file mode 100755 index 0000000000000..59990a1049267 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/config/install-sh @@ -0,0 +1,508 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2014-09-12.12; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dst=$dstdir/`basename "$src"` + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + # $RANDOM is not portable (e.g. dash); use it when possible to + # lower collision chance + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + + # As "mkdir -p" follows symlinks and we work in /tmp possibly; so + # create the $tmpdir first (and fail if unsuccessful) to make sure + # that nobody tries to guess the $tmpdir name. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/drivers/gpu/drm/amd/dkms/configure.ac b/drivers/gpu/drm/amd/dkms/configure.ac new file mode 100644 index 0000000000000..ec686045c8995 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/configure.ac @@ -0,0 +1,12 @@ +AC_INIT(amdgpu-dkms, 6.10.5) + +AC_LANG(C) +AC_CONFIG_AUX_DIR([config]) +AC_CONFIG_HEADERS([config/config.h]) +AC_PROG_INSTALL +AC_PROG_CC +AC_CONFIG_MACRO_DIR([m4]) + +AC_AMDGPU_CONFIG + +AC_OUTPUT diff --git a/drivers/gpu/drm/amd/dkms/dkms.conf b/drivers/gpu/drm/amd/dkms/dkms.conf new file mode 100644 index 0000000000000..ceef7d15a7c26 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/dkms.conf @@ -0,0 +1,54 @@ +PACKAGE_NAME="amdgpu" +PACKAGE_VERSION="1.0" +AUTOINSTALL="yes" +module_build_dir="$(mktemp -ut amd.XXXXXXXX)" +PRE_BUILD="amd/dkms/pre-build.sh $kernelver $dkms_tree $module $module_version $module_build_dir" +POST_BUILD="amd/dkms/post-build.sh $module_build_dir" +# not all OS supports weak module updates +NO_WEAK_MODULES="yes" + +# not work with RHEL DKMS +#MODULES_CONF[0]="blacklist radeon" + +BUILT_MODULE_NAME[0]="amdgpu" +BUILT_MODULE_LOCATION[0]="amd/amdgpu" +DEST_MODULE_LOCATION[0]="/kernel/drivers/gpu/drm/amd/amdgpu" + +BUILT_MODULE_NAME[1]="amdttm" +BUILT_MODULE_LOCATION[1]="ttm" +DEST_MODULE_LOCATION[1]="/kernel/drivers/gpu/drm/ttm" + +BUILT_MODULE_NAME[2]="amdkcl" +BUILT_MODULE_LOCATION[2]="amd/amdkcl" +DEST_MODULE_LOCATION[2]="/kernel/drivers/gpu/drm/amd/amdkcl" + +BUILT_MODULE_NAME[3]="amd-sched" +BUILT_MODULE_LOCATION[3]="scheduler" +DEST_MODULE_LOCATION[3]="/kernel/drivers/gpu/drm/scheduler" + +BUILT_MODULE_NAME[4]="amddrm_ttm_helper" +BUILT_MODULE_LOCATION[4]="." +DEST_MODULE_LOCATION[4]="/kernel/drivers/gpu/drm" + +BUILT_MODULE_NAME[5]="amddrm_buddy" +BUILT_MODULE_LOCATION[5]="." +DEST_MODULE_LOCATION[5]="/kernel/drivers/gpu/drm" + +BUILT_MODULE_NAME[6]="amdxcp" +BUILT_MODULE_LOCATION[6]="amd/amdxcp" +DEST_MODULE_LOCATION[6]="/kernel/drivers/gpu/drm/amd/amdxcp" + +num_cpu_cores() +{ + if [ -x /usr/bin/nproc ]; then + nproc + else + echo "1" + fi +} + +MAKE[0]=". $module_build_dir/.env && make -j$(num_cpu_cores) KERNELRELEASE=$kernelver \ + TTM_NAME=${BUILT_MODULE_NAME[1]} \ + SCHED_NAME=${BUILT_MODULE_NAME[3]} \ + -C $kernel_source_dir \ + M=$module_build_dir" diff --git a/drivers/gpu/drm/amd/dkms/docs/examples/wattman-example-script b/drivers/gpu/drm/amd/dkms/docs/examples/wattman-example-script new file mode 100644 index 0000000000000..90b14faec6dbf --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/docs/examples/wattman-example-script @@ -0,0 +1,74 @@ +## wattman-like functionality +# boot with amdgpu.ppfeaturemask=0xffffffff (make sure PP_OVERDRIVE_MASK bit is set see hwmgr.h) +# see the current dpm clock and voltage levels +cat /sys/class/drm/card0/device/pp_od_clk_voltage +#OD_SCLK: +#0: 300Mhz 900 mV +#1: 484Mhz 925 mV +#2: 709Mhz 962 mV +#3: 858Mhz 1112 mV +#4: 891Mhz 1150 mV +#5: 917Mhz 1175 mV +#6: 949Mhz 1175 mV +#7: 973Mhz 1175 mV +#OD_MCLK: +#0: 150Mhz 900 mV +#1: 1375Mhz 975 mV +# change mclk dpm level 0 from 150 to 155Mhz, no change to voltage +# format is "m dpm_level clock_in_mhz voltage_in_mv" +echo "m 0 155 900" > /sys/class/drm/card0/device/pp_od_clk_voltage +# change sclk dpm level 7 from 973 to 975Mhz, change voltage from 1175 to 1180 mV +# format is "s dpm_level clock_in_mhz voltage_in_mv" +echo "s 7 975 1180" > /sys/class/drm/card0/device/pp_od_clk_voltage +# change sclk dpm level 5 from 917 to 910Mhz, change voltage from 1175 to 1160 mV +# format is "s dpm_level clock_in_mhz voltage_in_mv" +echo "s 7 910 1160" > /sys/class/drm/card0/device/pp_od_clk_voltage +# see the current dpm clock and voltage levels +cat /sys/class/drm/card0/device/pp_od_clk_voltage +#OD_SCLK: +#0: 300Mhz 900 mV +#1: 484Mhz 925 mV +#2: 709Mhz 962 mV +#3: 858Mhz 1112 mV +#4: 891Mhz 1150 mV +#5: 910Mhz 1160 mV +#6: 949Mhz 1175 mV +#7: 975Mhz 1180 mV +#OD_MCLK: +#0: 155Mhz 900 mV +#1: 1375Mhz 975 mV +# commit the changes to the hw +echo "c" > /sys/class/drm/card0/device/pp_od_clk_voltage +# reset to the default dpm states +echo "r" > /sys/class/drm/card0/device/pp_od_clk_voltage +# commit the reset state to the hw +echo "c" > /sys/class/drm/card0/device/pp_od_clk_voltage + +## reading/adjusting hwmon values +# https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface +# see which hwmon device this is +cat /sys/class/hwmon/hwmon0/name +# readback current vddgfx/vddnb voltages +# see which one this is +cat /sys/class/hwmon/hwmon0/in0_label +# read the voltage (mV) +cat /sys/class/hwmon/hwmon0/in0_input +# see current power (microwatts) +cat /sys/class/hwmon/hwmon0/power1_average +# current temp (millidegrees C) +cat /sys/class/hwmon/hwmon0/temp1_input +# see fan speed (rpm) +cat /sys/class/hwmon/hwmon0/fan1_input +# see fan speed pwm (0-255) +cat /sys/class/hwmon/hwmon0/pwm1 +# see min/max pwm limits +cat /sys/class/hwmon/hwmon0/pwm1_min +cat /sys/class/hwmon/hwmon0/pwm1_max +# see current fan control mode (0 none, 1 manual fan control, 2 dynamic fan control) +cat /sys/class/hwmon/hwmon0/pwm1_enable +# enable manual fan control +echo 1 > /sys/class/hwmon/hwmon0/pwm1_enable +# manually set the fan speed (100/255 = 39%) +echo 100 > /sys/class/hwmon/hwmon0/pwm1 +# enable automatic fan control +echo 2 > /sys/class/hwmon/hwmon0/pwm1_enable diff --git a/drivers/gpu/drm/amd/dkms/m4/__drm_atomic_helper_crtc_reset.m4 b/drivers/gpu/drm/amd/dkms/m4/__drm_atomic_helper_crtc_reset.m4 new file mode 100644 index 0000000000000..532031624b3a1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/__drm_atomic_helper_crtc_reset.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.1-rc2-1163-g7d26097b4beb +dnl # drm/atomic: Create __drm_atomic_helper_crtc_reset() for subclassing crtc_state. +dnl # +AC_DEFUN([AC_AMDGPU___DRM_ATOMIC_HELPER_CRTC_RESET], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + __drm_atomic_helper_crtc_reset(NULL, NULL); + ],[__drm_atomic_helper_crtc_reset], [drivers/gpu/drm/drm_atomic_state_helper.c],[ + AC_DEFINE(HAVE___DRM_ATOMIC_HELPER_CRTC_RESET, 1, + [__drm_atomic_helper_crtc_reset() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/__kthread-should-park.m4 b/drivers/gpu/drm/amd/dkms/m4/__kthread-should-park.m4 new file mode 100644 index 0000000000000..e4b111dff02e1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/__kthread-should-park.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # introduced commit 0121805d9d2b1fff371e195c28e9b86ae38b5e47 +dnl # kthread: Add __kthread_should_park() +dnl # +AC_DEFUN([AC_AMDGPU___KTHREAD_SHOULD_PARK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + __kthread_should_park(NULL); + ],[__kthread_should_park],[kernel/kthread.c],[ + AC_DEFINE(HAVE___KTHREAD_SHOULD_PARK, 1, + [__kthread_should_park() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/access-ok.m4 b/drivers/gpu/drm/amd/dkms/m4/access-ok.m4 new file mode 100644 index 0000000000000..70a1ec664ef7f --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/access-ok.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit 96d4f267e40f9509e8a66e2b39e8b95655617693 +dnl # Author: Linus Torvalds +dnl # Date: Thu Jan 3 18:57:57 2019 -0800 +dnl # Remove 'type' argument from access_ok() function +dnl # +AC_DEFUN([AC_AMDGPU_ACCESS_OK_WITH_TWO_ARGUMENTS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + access_ok(0, 0); + ],[ + AC_DEFINE(HAVE_ACCESS_OK_WITH_TWO_ARGUMENTS, 1, + [whether access_ok(x, x) is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/acpi-put-table.m4 b/drivers/gpu/drm/amd/dkms/m4/acpi-put-table.m4 new file mode 100644 index 0000000000000..83a0b1e027b9b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/acpi-put-table.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit: v4.9-rc5-17-g174cc7187e6f +dnl # ACPICA: Tables: Back port acpi_get_table_with_size() and +dnl # early_acpi_os_unmap_memory() from Linux kernel +AC_DEFUN([AC_AMDGPU_ACPI_PUT_TABLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + acpi_put_table(NULL); + ], [acpi_put_table], [drivers/acpi/acpica/tbxface.c], [ + AC_DEFINE(HAVE_ACPI_PUT_TABLE, 1, + [acpi_put_table() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/acpi-video-funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/acpi-video-funcs.m4 new file mode 100644 index 0000000000000..d2a957a3c28a7 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/acpi-video-funcs.m4 @@ -0,0 +1,41 @@ +dnl # +dnl # commit: v6.1-rc1-17-da11ef832972 +dnl # drm/amdgpu: Don't register backlight when another +dnl # backlight should be used (v3) + +AC_DEFUN([AC_AMDGPU_ACPI_VIDEO_BACKLIGHT_USE_NATIVE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + acpi_video_backlight_use_native(); + ], [ + AC_DEFINE(HAVE_ACPI_VIDEO_BACKLIGHT_USE_NATIVE, 1, + [acpi_video_backlight_use_native() is available]) + ]) + ]) +]) + +dnl # +dnl # commit: v6.1-rc1-161-c0f50c5de93b +dnl # drm/amdgpu: Register ACPI video backlight when +dnl # skipping amdgpu backlight registration + +AC_DEFUN([AC_AMDGPU_ACPI_VIDEO_REGISTER_BACKLIGHT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + acpi_video_register_backlight(); + ], [ + AC_DEFINE(HAVE_ACPI_VIDEO_REGISTER_BACKLIGHT, 1, + [acpi_video_register_backlight() is available]) + ]) + ]) +]) + + +AC_DEFUN([AC_AMDGPU_ACPI_VIDEO_FUNCS], [ + AC_AMDGPU_ACPI_VIDEO_BACKLIGHT_USE_NATIVE + AC_AMDGPU_ACPI_VIDEO_REGISTER_BACKLIGHT +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/acpi_dev_get_first_match_dev.m4 b/drivers/gpu/drm/amd/dkms/m4/acpi_dev_get_first_match_dev.m4 new file mode 100644 index 0000000000000..5668a2d728b89 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/acpi_dev_get_first_match_dev.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit: v5.1-rc3-1-g817b4d64da03 +dnl # ACPI / utils: Introduce acpi_dev_get_first_match_dev() helper +dnl # +AC_DEFUN([AC_AMDGPU_ACPI_DEV_GET_FIRST_MATCH_DEV], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + acpi_dev_get_first_match_dev(NULL, NULL, 0); + ],[acpi_dev_get_first_match_dev],[drivers/acpi/utils.c], [ + AC_DEFINE(HAVE_ACPI_DEV_GET_FIRST_MATCH_DEV, 1, + [acpi_dev_get_first_match_dev() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/acpi_srat.m4 b/drivers/gpu/drm/amd/dkms/m4/acpi_srat.m4 new file mode 100644 index 0000000000000..16493b5e8d995 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/acpi_srat.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit aa475a59fff172ec858093fbc8471c0993081481 +dnl # ACPICA: ACPI 6.3: SRAT: add Generic Affinity Structure subtable +dnl # +AC_DEFUN([AC_AMDGPU_ACPI_SRAT_GENERIC_AFFINITY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct acpi_srat_generic_affinity *p = NULL; + p->reserved = 0; + ], [ + AC_DEFINE(HAVE_ACPI_SRAT_GENERIC_AFFINITY, 1, [struct acpi_srat_generic_affinity is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/amd-iommu-pc-supported.m4 b/drivers/gpu/drm/amd/dkms/m4/amd-iommu-pc-supported.m4 new file mode 100644 index 0000000000000..c42fcb583b362 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/amd-iommu-pc-supported.m4 @@ -0,0 +1,44 @@ +dnl # +dnl # v5.12-rc3-5-gfc1b6620501f iommu/amd: Move a few prototypes to include/linux/amd-iommu.h +dnl # v5.12-rc3-4-gb29a1fc7595a iommu/amd: Remove a few unused exports +dnl # v4.11-rc4-171-gf5863a00e73c x86/events/amd/iommu.c: Modify functions to query max banks and counters +dnl # +AC_DEFUN([AC_AMDGPU_AMD_IOMMU_PC_GET_MAX_BANKS], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + amd_iommu_pc_get_max_banks(0); + ], [ + AC_DEFINE(HAVE_AMD_IOMMU_PC_GET_MAX_BANKS_DECLARED, 1, + [amd_iommu_pc_get_max_banks() declared]) + ], [ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + get_amd_iommu(0); + ],[get_amd_iommu],[drivers/iommu/amd/init.c], [ + AC_DEFINE(HAVE_AMD_IOMMU_PC_GET_MAX_BANKS_UINT, 1, + [amd_iommu_pc_get_max_banks() arg is unsigned int]) + ]) + ]) +]) + +dnl # +dnl # commit v3.10-rc3-89-g30861ddc9cca +dnl # perf/x86/amd: Add IOMMU Performance Counter resource management +dnl # +AC_DEFUN([AC_AMDGPU_AMD_IOMMU_PC_SUPPORTED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + #ifndef CONFIG_AMD_IOMMU + #error CONFIG_AMD_IOMMU not enabled + #endif + ], [amd_iommu_pc_supported], [drivers/iommu/amd/init.c], [ + AC_DEFINE(HAVE_AMD_IOMMU_PC_SUPPORTED, 1, + [amd_iommu_pc_supported() is available]) + AC_AMDGPU_AMD_IOMMU_PC_GET_MAX_BANKS + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/amd_iommu_invalidate_ctx.m4 b/drivers/gpu/drm/amd/dkms/m4/amd_iommu_invalidate_ctx.m4 new file mode 100644 index 0000000000000..3ddbe27c03284 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/amd_iommu_invalidate_ctx.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # commit c7b6bac9c72c5fcbd6e9e12545bd3022c7f21860 +dnl # drm, iommu: Change type of pasid to u32 +dnl # +AC_DEFUN([AC_AMDGPU_AMD_IOMMU_INVALIDATE_CTX], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + void (*f)(struct pci_dev *pdev, u32 pasid) = NULL; + amd_iommu_invalidate_ctx callback; + + callback = f; + ], [ + AC_DEFINE(HAVE_AMD_IOMMU_INVALIDATE_CTX_PASID_U32, 1, + [amd_iommu_invalidate_ctx take arg type of pasid as u32]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/apple_gmux_detect.m4 b/drivers/gpu/drm/amd/dkms/m4/apple_gmux_detect.m4 new file mode 100644 index 0000000000000..defc80545265e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/apple_gmux_detect.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v6.1-2256-gbd100f492c7e +dnl # platform/x86: apple-gmux: Add apple_gmux_detect() helper +dnl # +AC_DEFUN([AC_AMDGPU_APPLE_GMUX_DETECT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + apple_gmux_detect(NULL, NULL); + ],[ + AC_DEFINE(HAVE_APPLE_GMUX_DETECT, 1, + [apple_gmux_detect() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/arch-io-reserve-free-memtype-wc.m4 b/drivers/gpu/drm/amd/dkms/m4/arch-io-reserve-free-memtype-wc.m4 new file mode 100644 index 0000000000000..8245a8d52ee43 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/arch-io-reserve-free-memtype-wc.m4 @@ -0,0 +1,34 @@ +dnl # +dnl # commit v4.9-rc2-1-g8ef4227615e1 +dnl # x86/io: add interface to reserve io memtype for a resource range. (v1.1) +dnl # +AC_DEFUN([AC_AMDGPU_ARCH_IO_RESERVE_FREE_MEMTYPE_WC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + arch_io_reserve_memtype_wc(0, 0); + arch_io_free_memtype_wc(0, 0); + ], [arch_io_reserve_memtype_wc arch_io_free_memtype_wc], [arch/x86/mm/pat/memtype.c arch/x86/mm/pat.c], [ + AC_DEFINE(HAVE_ARCH_IO_RESERVE_FREE_MEMTYPE_WC, 1, + [arch_io_{reserve/free}_memtype_wc() are available]) + ], [ + AC_KERNEL_TRY_COMPILE([ + #ifdef HAVE_DRM_DRM_BACKPORT_H + #include + #endif + #include + ], [ + #ifdef CONFIG_X86 + #error stub arch_io_* functions found + #endif + + arch_io_reserve_memtype_wc(0, 0); + arch_io_free_memtype_wc(0, 0); + ], [ + AC_DEFINE(HAVE_ARCH_IO_RESERVE_FREE_MEMTYPE_WC, 1, + [arch_io_{reserve/free}_memtype_wc() are available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/asm_topology.m4 b/drivers/gpu/drm/amd/dkms/m4/asm_topology.m4 new file mode 100644 index 0000000000000..ec603c4bd1a9b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/asm_topology.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # v6.8-rc4-70-gfd43b8ae76e9 +dnl # x86/cpu/topology: Provide __num_[cores|threads]_per_package +dnl # +AC_DEFUN([AC_AMDGPU_TOPOLOGY_NUM_CORES_PER_PACKAGE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + int a = 0; + a = topology_num_cores_per_package(); + ], [ + AC_DEFINE(HAVE_TOPOLOGY_NUM_CORES_PER_PACKAGE, 1, + [topology_num_cores_per_package is availablea]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/assign_str.m4 b/drivers/gpu/drm/amd/dkms/m4/assign_str.m4 new file mode 100644 index 0000000000000..477d281b98373 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/assign_str.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v6.9-11925-g2c92ca849fcc +dnl # tracing/treewide: Remove second parameter of __assign_str() +dnl # Due to trace system bases on runtime, so use script to handle specially +dnl # +AC_DEFUN([AC_AMDGPU_ASSIGN_STR], [ + AC_KERNEL_DO_BACKGROUND([ + header_file=stage6_event_callback.h + header_file_src=$LINUX/include/trace/stages/$header_file + AS_IF([test -f "$header_file_src"], [ + AS_IF([grep -q '^#define __assign_str(dst)' $header_file_src], [ + AC_DEFINE(HAVE_ASSIGN_STR_ONE_ARGUMENT, 1, + [__assign_str() wants 1 arguments]) + ]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/atomic-long-try-cmpxchg.m4 b/drivers/gpu/drm/amd/dkms/m4/atomic-long-try-cmpxchg.m4 new file mode 100644 index 0000000000000..5d20db398bf97 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/atomic-long-try-cmpxchg.m4 @@ -0,0 +1,37 @@ +dnl # +dnl # v5.13-rc1-138-g67d1b0de258a locking/atomic: add arch_atomic_long*() +dnl # +AC_DEFUN([AC_AMDGPU_LINUX_ATOMIC_LONG_TRY_CMPXCHG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + bool t; + long r = 0; + t = atomic_long_try_cmpxchg(NULL, NULL, r); + ], [ + AC_DEFINE(HAVE_LINUX_ATOMIC_LONG_TRY_CMPXCHG, 1, + [atomic_long_try_cmpxchg() is available]) + ]) + ]) +]) + +dnl # +dnl # v6.3-rc1-6-g8fc4fddaf9a1 +dnl # locking/generic: Wire up local{,64}_try_cmpxchg() +dnl # +AC_DEFUN([AC_AMDGPU_LINUX_LOCAL_TRY_CMPXCHG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + bool t; + s64 r = 0; + local_t *l = NULL; + t = local_try_cmpxchg(l, NULL, r); + ], [ + AC_DEFINE(HAVE_LINUX_LOCAL_TRY_CMPXCHG, 1, + [local_try_cmpchg() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/backlight-device-set-brightness.m4 b/drivers/gpu/drm/amd/dkms/m4/backlight-device-set-brightness.m4 new file mode 100644 index 0000000000000..b021cbc2ab976 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/backlight-device-set-brightness.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v4.6-rc6-1-gf6a4790a5471 +dnl # video / backlight: add two APIs for drivers to use +dnl # +AC_DEFUN([AC_AMDGPU_BACKLIGHT_DEVICE_SET_BRIGHTNESS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + backlight_device_set_brightness(NULL, 0); + ], [backlight_device_set_brightness], [drivers/video/backlight/backlight.c], [ + AC_DEFINE(HAVE_BACKLIGHT_DEVICE_SET_BRIGHTNESS, 1, + [backlight_device_set_brightness() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/bitmap_to_arr32.m4 b/drivers/gpu/drm/amd/dkms/m4/bitmap_to_arr32.m4 new file mode 100644 index 0000000000000..3c981e3fa9518 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/bitmap_to_arr32.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.19-rc1-22-525d6515604e +dnl # drm/amd/pm: use bitmap_{from,to}_arr32 where appropriate +dnl # +AC_DEFUN([AC_AMDGPU_BITMAP_TO_ARR32], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + bitmap_to_arr32(NULL, NULL, 0); + ],[ + AC_DEFINE(HAVE_BITMAP_TO_ARR32, 1, + [bitmap_to_arr32() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/cancel_work.m4 b/drivers/gpu/drm/amd/dkms/m4/cancel_work.m4 new file mode 100644 index 0000000000000..a9167fe9d15c0 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/cancel_work.m4 @@ -0,0 +1,39 @@ +dnl # +dnl # commit id:c46fd358070f22ba68d6e74c22016a33b914c20a +dnl # PCI/ASPM: Enable Latency Tolerance Reporting when supported +dnl # +dnl # +AC_DEFUN([AC_AMDGPU_CANCEL_WORK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + cancel_work(NULL); + ], [cancel_work], [kernel/workqueue.c], [ + AC_DEFINE(HAVE_CANCEL_WORK, 1, + [cancel_work() is available]) + ]) + ]) +]) + +dnl # +dnl # commit id:v5.0-rc2-28-g8204e0c1113d +dnl # workqueue: Provide queue_work_node to queue work near a given NUMA node +dnl # +AC_DEFUN([AC_AMDGPU_QUEUE_WORK_NODE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + queue_work_node(0,NULL, NULL); + ], [queue_work_node], [kernel/workqueue.c], [ + AC_DEFINE(HAVE_QUEUE_WORK_NODE, 1, + [queue_work_node() is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_WORKQUEUE], [ + AC_AMDGPU_CANCEL_WORK + AC_AMDGPU_QUEUE_WORK_NODE +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/chunk-id-scheduled-dependencies.m4 b/drivers/gpu/drm/amd/dkms/m4/chunk-id-scheduled-dependencies.m4 new file mode 100644 index 0000000000000..c1075f2a16d7f --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/chunk-id-scheduled-dependencies.m4 @@ -0,0 +1,16 @@ +dnl # commit 67dd1a36334ffce82bebeb2d633e152aa436d370 +dnl # drm/amdgpu: Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES +AC_DEFUN([AC_AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + #ifndef AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES + #error AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES not #defined + #endif + ], [ + AC_DEFINE(HAVE_AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES, 1, + [whether AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES is defined]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/chunk-id-syncobj-timeline-wait-signal.m4 b/drivers/gpu/drm/amd/dkms/m4/chunk-id-syncobj-timeline-wait-signal.m4 new file mode 100644 index 0000000000000..1ae2aee78530f --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/chunk-id-syncobj-timeline-wait-signal.m4 @@ -0,0 +1,17 @@ +dnl # commit 2624dd154bcc53ac2de16ecae9746ba867b6ca70 +dnl # drm/amdgpu: add timeline support in amdgpu CS v3 +AC_DEFUN([AC_AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + #if !defined(AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT) ||\ + !defined(AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL) + #error CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL not #defined + #endif + ], [ + AC_DEFINE(HAVE_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL, 1, + [whether CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL is defined]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/compat_ptr_ioctl.m4 b/drivers/gpu/drm/amd/dkms/m4/compat_ptr_ioctl.m4 new file mode 100644 index 0000000000000..f9c4c12aa4b75 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/compat_ptr_ioctl.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.4-rc2-1-g2952db0fd51b +dnl # compat_ioctl: add compat_ptr_ioctl() +dnl # +AC_DEFUN([AC_AMDGPU_COMPAT_PTR_IOCTL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + compat_ptr_ioctl(NULL, 0, 0); + ],[compat_ptr_ioctl],[fs/ioctl.c],[ + AC_DEFINE(HAVE_COMPAT_PTR_IOCTL, + 1, + [compat_ptr_ioctl() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/config.m4 b/drivers/gpu/drm/amd/dkms/m4/config.m4 new file mode 100644 index 0000000000000..d66b31b9b3536 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/config.m4 @@ -0,0 +1,9 @@ +AC_DEFUN([AC_AMDGPU_CONFIG], [ + AC_ARG_ENABLE([linux-builtin], + [AS_HELP_STRING([--enable-linux-builtin], + [Configure for builtin kernel modules @<:@default=no@:>@])], + [], + [enable_linux_builtin=no]) + + AC_CONFIG_KERNEL +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/create_class.m4 b/drivers/gpu/drm/amd/dkms/m4/create_class.m4 new file mode 100644 index 0000000000000..bb9bd7bd2d13d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/create_class.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v6.3-rc1-13-g1aaba11da9aa driver core: class: remove module * from class_create() +dnl # +AC_DEFUN([AC_AMDGPU_LINUX_DEVICE_CLASS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct class* class = NULL; + class = class_create(NULL); + ], [ + AC_DEFINE(HAVE_ONE_ARGUMENT_OF_CLASS_CREATE, 1, + [class_create has one argument]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/debugfs_inode.m4 b/drivers/gpu/drm/amd/dkms/m4/debugfs_inode.m4 new file mode 100644 index 0000000000000..29b066233e0d4 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/debugfs_inode.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit: v3.19-rc5-12-ge59b4e9187bd +dnl # debugfs: Provide a file creation function +dnl # that also takes an initial size +AC_DEFUN([AC_AMDGPU_DEBUGFS_CREATE_FILE_SIZE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + debugfs_create_file_size(NULL, 0, NULL, NULL, NULL, 0); + ],[debugfs_create_file_size], [fs/debugfs/inode.c], [ + AC_DEFINE(HAVE_DEBUGFS_CREATE_FILE_SIZE, 1, + [debugfs_create_file_size() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/dev-pagemap.m4 b/drivers/gpu/drm/amd/dkms/m4/dev-pagemap.m4 new file mode 100644 index 0000000000000..b33ceb61643ce --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dev-pagemap.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # commit a4574f63edc6f76fb46dcd65d3eb4d5a8e23ba38 +dnl # mm/memremap_pages: convert to 'struct range' +dnl # +AC_DEFUN([AC_AMDGPU_DEV_PAGEMAP_RANGE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct dev_pagemap *pm = NULL; + pm->range.start = 0; + ], [ + AC_DEFINE(HAVE_DEV_PAGEMAP_RANGE, 1, + [there is 'range' field within dev_pagemap structure]) + ]) + ]) +]) +AC_DEFUN([AC_AMDGPU_DEV_PAGEMAP], [ + AC_AMDGPU_DEV_PAGEMAP_RANGE +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/dev-pm-set-driver-flags.m4 b/drivers/gpu/drm/amd/dkms/m4/dev-pm-set-driver-flags.m4 new file mode 100644 index 0000000000000..d1fba526e26d1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dev-pm-set-driver-flags.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit v4.14-rc4-21-g08810a4119aa +dnl # Author: Rafael J. Wysocki +dnl # Date: Wed Oct 25 14:12:29 2017 +0200 +dnl # PM / core: Add NEVER_SKIP and SMART_PREPARE driver flags +dnl # +AC_DEFUN([AC_AMDGPU_DEV_PM_SET_DRIVER_FLAGS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + dev_pm_set_driver_flags(NULL, 1); + ], [ + AC_DEFINE(HAVE_DEV_PM_SET_DRIVER_FLAGS, 1, + [dev_pm_set_driver_flags() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/dev_is_removable.m4 b/drivers/gpu/drm/amd/dkms/m4/dev_is_removable.m4 new file mode 100644 index 0000000000000..14ddb4989bbac --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dev_is_removable.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.13-rc2-70-g70f400d4d957 +dnl # driver core: Move the "removable" attribute from USB to core +dnl # +AC_DEFUN([AC_AMDGPU_DEV_IS_REMOVABLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + bool res = 0; + res = dev_is_removable(NULL); + ], [ + AC_DEFINE(HAVE_DEV_IS_REMOVABLE, 1, + [dev_is_removable() is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/devcgroup-check-permission.m4 b/drivers/gpu/drm/amd/dkms/m4/devcgroup-check-permission.m4 new file mode 100644 index 0000000000000..0341249c5457b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/devcgroup-check-permission.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # commit v5.3-rc3-2427-g4b7d4d453fc4 +dnl # device_cgroup: Export devcgroup_check_permission +dnl # +AC_DEFUN([AC_AMDGPU_DEVCGROUP_CHECK_PERMISSION], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + devcgroup_check_permission(0, 0, 0, 0); + ], [devcgroup_check_permission], [security/device_cgroup.c], [ + AC_DEFINE(HAVE_DEVCGROUP_CHECK_PERMISSION, 1, [devcgroup_check_permission() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-buf.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-buf.m4 new file mode 100644 index 0000000000000..86159c3f96200 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-buf.m4 @@ -0,0 +1,59 @@ +dnl # +dnl # v5.6-rc5-1663-g09606b5446c2 +dnl # dma-buf: add peer2peer flag +dnl # +AC_DEFUN([AC_AMDGPU_DMA_BUF], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct dma_buf_attach_ops *ptr = NULL; + ptr->allow_peer2peer = false; + ],[ + AC_DEFINE(HAVE_STRUCT_DMA_BUF_ATTACH_OPS_ALLOW_PEER2PEER, + 1, + [struct dma_buf_attach_ops->allow_peer2peer is available]) + + AC_DEFINE(HAVE_STRUCT_DMA_BUF_OPS_PIN, + 1, + [struct dma_buf_ops->pin() is available]) + ],[ + dnl # + dnl # 4981cdb063e3 dma-buf: make move_notify mandatory if importer_ops are provided + dnl # bd2275eeed5b dma-buf: drop dynamic_mapping flag + dnl # a448cb003edc drm/amdgpu: implement amdgpu_gem_prime_move_notify v2 + dnl # 2d4dad2734e2 drm/amdgpu: add amdgpu_dma_buf_pin/unpin v2 + dnl # 4993ba02635f drm/amdgpu: use allowed_domains for exported DMA-bufs + dnl # d2588d2ded0f drm/ttm: remove the backing store if no placement is given + dnl # bb42df4662a4 dma-buf: add dynamic DMA-buf handling v15 + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct dma_buf_ops *ptr = NULL; + ptr->pin(NULL); + ],[ + AC_DEFINE(HAVE_STRUCT_DMA_BUF_OPS_PIN, + 1, + [struct dma_buf_ops->pin() is available]) + ], [ + dnl # + dnl # commit v5.4-rc4-863-g15fd552d186c + dnl # dma-buf: change DMA-buf locking convention v3 + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct dma_buf_ops *dma_buf_ops = NULL; + dma_buf_ops->dynamic_mapping = true; + ],[ + AC_DEFINE(HAVE_DMA_BUF_OPS_DYNAMIC_MAPPING, 1, + [dma_buf->dynamic_mapping is available]) + ],[ + AC_DEFINE(HAVE_DMA_BUF_OPS_LEGACY, 1, + [dma_buf->dynamic_mapping is not available]) + ]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-fence-chain.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-fence-chain.m4 new file mode 100644 index 0000000000000..f35b2f8d404ca --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-fence-chain.m4 @@ -0,0 +1,53 @@ +dnl # +dnl # v5.13-rc3-1424-g440d0f12b52a +dnl # dma-buf: add dma_fence_chain_alloc/free v3 +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_CHAIN_ALLOC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct dma_fence_chain *chain = NULL; + chain = dma_fence_chain_alloc(); + ], [ + AC_DEFINE(HAVE_DMA_FENCE_CHAIN_ALLOC, 1, + [dma_fence_chain_alloc() is available]) + ]) + ]) +]) + +dnl # +dnl # v5.0-1331-g7bf60c52e093 +dnl # dma-buf: add new dma_fence_chain container v7 +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_CHAIN_STRUCT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct dma_fence_chain *chain; + chain = NULL; + ], [ + AC_DEFINE(HAVE_STRUCT_DMA_FENCE_CHAIN, 1, + [struct dma_fence_chain is available]) + ]) + ]) +]) + +dnl # +dnl # v5.17-rc2-233-g18f5fad275ef +dnl # dma-buf: add dma_fence_chain_contained helper +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_CHAIN_CONTAINED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + dma_fence_chain_contained(NULL); + ], [ + AC_DEFINE(HAVE_DMA_FENCE_CHAIN_CONTAINED, 1, + [dma_fence_chain_contained() is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-fence-describe.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-fence-describe.m4 new file mode 100644 index 0000000000000..e82d65e149645 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-fence-describe.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v5.15-rc2-1312-ga25efb3863d0 +dnl # dma-buf: add dma_fence_describe and dma_resv_describe v2 +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_DESCRIBE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + dma_fence_describe(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DMA_FENCE_DESCRIBE, 1, + [dma_fence_describe() is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-fence-is-container.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-fence-is-container.m4 new file mode 100644 index 0000000000000..7c07948aa5205 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-fence-is-container.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # commit v5.17-rc2-229-g976b6d97c623 +dnl # dma-buf: consolidate dma_fence subclass checking +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_IS_CONTAINER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + dma_fence_is_container(NULL); + ], [ + AC_DEFINE(HAVE_DMA_FENCE_IS_CONTAINER, 1, [dma_fence_is_container() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-fence-is-later.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-fence-is-later.m4 new file mode 100644 index 0000000000000..0523264d08807 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-fence-is-later.m4 @@ -0,0 +1,46 @@ +dnl # +dnl # v5.1-rc2-1115-g5e498abf1485 +dnl # dma-buf: explicitely note that dma-fence-chains use 64bit seqno +dnl # +AC_DEFUN([AC_AMDGPU__DMA_FENCE_IS_LATER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + const struct dma_fence_ops *ops = NULL; + __dma_fence_is_later(0, 0, ops); + ], [ + AC_DEFINE(HAVE__DMA_FENCE_IS_LATER_WITH_OPS_ARG, 1, + [__dma_fence_is_later() is available and has ops arg]) + ], [ + dnl # + dnl # v4.20-rc4-931-gb312d8ca3a7c + dnl # dma-buf: make fence sequence numbers 64 bit v2 + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + __dma_fence_is_later(0, 0); + ], [ + AC_DEFINE(HAVE__DMA_FENCE_IS_LATER_2ARGS, 1, + [__dma_fence_is_later() is available and has 2 args]) + ]) + ]) + ]) +]) + +dnl # +dnl # v6.7-rc1-17-g95ba893c9f4f +dnl # dma-buf: fix check in dma_resv_add_fence +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_IS_LATER_OR_SAME], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + dma_fence_is_later_or_same(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DMA_FENCE_IS_LATER_OR_SAME, 1, [dma_fence_is_later_or_same() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-fence-ops.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-fence-ops.m4 new file mode 100644 index 0000000000000..5fd3aeec58e80 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-fence-ops.m4 @@ -0,0 +1,38 @@ +dnl # +dnl # v5.1-rc2-1115-g5e498abf1485 +dnl # dma-buf: explicitely note that dma-fence-chains use 64bit seqno +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_OPS_USE_64BIT_SEQNO], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct dma_fence_ops *ops = NULL; + ops->use_64bit_seqno = false; + ], [ + AC_DEFINE(HAVE_DMA_FENCE_OPS_USE_64BIT_SEQNO, 1, + [struct dma_fence_ops has use_64bit_seqno field]) + ]) + ]) +]) + + +dnl # +dnl # v6.3-rc2-1-gaec11c8d7cb3 +dnl # dma-buf/dma-fence: Add deadline awareness +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_OPS_SET_DEADLINE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + ktime_t deadline = 0; + struct dma_fence_ops *ops = NULL; + ops->set_deadline(NULL, deadline); + ], [ + AC_DEFINE(HAVE_DMA_FENCE_OPS_SET_DEADLINE, 1, + [struct dma_fence_ops has callback set_deadline]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-fence-timestamp.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-fence-timestamp.m4 new file mode 100644 index 0000000000000..8054979dd1b6a --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-fence-timestamp.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v6.6-rc1-33-gb83ce9cb4a46 +dnl # dma-buf: add dma_fence_timestamp helper +dnl # +AC_DEFUN([AC_AMDGPU_DMA_FENCE_TIMESTAMP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + ktime_t time; + time = dma_fence_timestamp(NULL); + ], [ + AC_DEFINE(HAVE_DMA_FENCE_TIMESTAMP, 1, [dma_fence_TIMESTAMP() is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/dma-resv.m4 b/drivers/gpu/drm/amd/dkms/m4/dma-resv.m4 new file mode 100644 index 0000000000000..8f83620935f38 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma-resv.m4 @@ -0,0 +1,71 @@ +dnl # +dnl # v5.18-rc1-237-g047a1b877ed4 +dnl # dma-buf & drm/amdgpu: remove dma_resv workaround +dnl # +AC_DEFUN([AC_AMDGPU_DMA_RESV_FENCES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct dma_resv *resv = NULL; + resv->fences = NULL; + ], [ + dnl # this is the latest kernel + AC_DEFINE(HAVE_DMA_RESV_FENCES, 1,[dma_resv->fences is available]) + ], [ + dnl # + dnl # v5.8-rc6-36-gcd29f22019ec dma-buf: Use sequence counter with associated wound/wait mutex + dnl # v5.8-rc6-35-g318ce71f3e3a dma-buf: Remove custom seqcount lockdep class key + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct dma_resv *obj = NULL; + seqcount_ww_mutex_init(&obj->seq, &obj->lock); + ], [ + AC_DEFINE(HAVE_DMA_RESV_SEQCOUNT_WW_MUTEX_T, 1, [dma_resv->seq is seqcount_ww_mutex_t]) + ], [ + dnl # + dnl # v5.3-rc1-476-gb016cd6ed4b7 dma-buf: Restore seqlock around dma_resv updates + dnl # v5.3-rc1-449-g52791eeec1d9 dma-buf: rename reservation_object to dma_resv + dnl # v5.3-rc1-448-g5d344f58da76 dma-buf: nuke reservation_object seq number + dnl # + AC_KERNEL_TRY_COMPILE([ + #ifdef HAVE_LINUX_DMA_RESV_H + #include + #else + #include + #endif + ], [ + #ifdef HAVE_LINUX_DMA_RESV_H + struct dma_resv *resv = NULL; + #else + struct reservation_object *resv = NULL; + #endif + write_seqcount_begin(&resv->seq); + ], [ + AC_DEFINE(HAVE_DMA_RESV_SEQ, 1,[dma_resv->seq is available]) + ],[ + dnl # + dnl # Trigger the bug for dma_resv->seq definition + dnl # + AC_DEFINE(HAVE_DMA_RESV_SEQ_BUG, 1, [Reporting dma_resv->seq bug]) + ]) + ]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DMA_RESV_RESERVATION_WW_CLASS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_CHECK_SYMBOL_EXPORT([reservation_ww_class],[drivers/dma-buf/dma-resv.c], + [],[ + AC_DEFINE(HAVE_RESERVATION_WW_CLASS_BUG, 1, [Reporting reservation_ww_class missing]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DMA_RESV], [ + AC_AMDGPU_DMA_RESV_FENCES + AC_AMDGPU_DMA_RESV_RESERVATION_WW_CLASS +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/dma_buf_is_dynamic.m4 b/drivers/gpu/drm/amd/dkms/m4/dma_buf_is_dynamic.m4 new file mode 100644 index 0000000000000..0b5844f8c43dc --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma_buf_is_dynamic.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.4-rc4-863-g15fd552d186c +dnl # dma-buf: change DMA-buf locking convention v3 +dnl # +AC_DEFUN([AC_AMDGPU_DMA_BUF_IS_DYNAMIC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + dma_buf_is_dynamic(NULL); + ],[ + AC_DEFINE(HAVE_DMA_BUF_IS_DYNAMIC, 1, + [dma_buf_is_dynamic() is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/dma_map_sgtable.m4 b/drivers/gpu/drm/amd/dkms/m4/dma_map_sgtable.m4 new file mode 100644 index 0000000000000..09d1275020880 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/dma_map_sgtable.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # v5.7-rc5-32-gd9d200bcebc1 +dnl # dma-mapping: add generic helpers for mapping sgtable objects +dnl # +AC_DEFUN([AC_AMDGPU_DMA_MAP_SGTABLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + dma_map_sgtable(NULL, NULL, 0, 0); + ], [ + AC_DEFINE(HAVE_DMA_MAP_SGTABLE, 1, + [dma_map_sgtable() is enabled]) + ] + dnl # + dnl # v4.7-11546-g00085f1efa38 + dnl # dma-mapping: use unsigned long for dma_attrs + dnl # leverage test for linux/dma-attrs.h + ) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/down-read-killable.m4 b/drivers/gpu/drm/amd/dkms/m4/down-read-killable.m4 new file mode 100644 index 0000000000000..7a74bd4d25889 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/down-read-killable.m4 @@ -0,0 +1,17 @@ +#dnl +#dnl commit v4.14-rc4-65-g76f8507f7a64 +#dnl locking/rwsem: Add down_read_killable() +#dnl +AC_DEFUN([AC_AMDGPU_DOWN_READ_KILLABLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + int ret; + ret = down_read_killable(NULL); + ],[down_read_killable], [kernel/locking/rwsem.c],[ + AC_DEFINE(HAVE_DOWN_READ_KILLABLE, 1, + [down_read_killable() is available])] + ) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/down-write-killable.m4 b/drivers/gpu/drm/amd/dkms/m4/down-write-killable.m4 new file mode 100644 index 0000000000000..c048731800f48 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/down-write-killable.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit 916633a403702549d37ea353e63a68e5b0dc27ad +dnl # locking/rwsem: Provide down_write_killable() +dnl # +AC_DEFUN([AC_AMDGPU_DOWN_WRITE_KILLABLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + int ret; + ret = down_write_killable(NULL); + ], [down_write_killable],[kernel/locking/rwsem.c],[ + AC_DEFINE(HAVE_DOWN_WRITE_KILLABLE, 1, + [down_write_killable() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-aperture-remove-conflicting-pci-framebuffers.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-aperture-remove-conflicting-pci-framebuffers.m4 new file mode 100644 index 0000000000000..50cfd872b53b3 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-aperture-remove-conflicting-pci-framebuffers.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit v5.13-rc3-1543-g97c9bfe3f660 +dnl # drm/aperture: Pass DRM driver structure instead of driver name +dnl # +AC_DEFUN([AC_AMDGPU_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_DRM_DRIVER_ARG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + struct drm_driver; + ], [ + const struct drm_driver *drv = NULL; + drm_aperture_remove_conflicting_pci_framebuffers(NULL, drv); + ], [ + AC_DEFINE(HAVE_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_DRM_DRIVER_ARG, 1, + [drm_aperture_remove_conflicting_pci_framebuffers() second arg is drm_driver*]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-atomic-state.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-atomic-state.m4 new file mode 100644 index 0000000000000..cd7227d5c12eb --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-atomic-state.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # v5.0-rc1-415-g022debad063e +dnl # drm/atomic: Add drm_atomic_state->duplicated +dnl # +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_ATOMIC_STATE_DUPLICATED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_atomic_state *state = NULL; + state->duplicated = 0; + ],[ + AC_DEFINE(HAVE_STRUCT_DRM_ATOMIC_STATE_DUPLICATED, 1, + [struct drm_connector_state->duplicated is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-bitmap-functions.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-bitmap-functions.m4 new file mode 100644 index 0000000000000..c5a71aac5af1e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-bitmap-functions.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # commit c42b65e363ce introduce this change +dnl # v4.17-3-gc42b65e363ce +dnl # bitmap: Add bitmap_alloc(), bitmap_zalloc() and bitmap_free() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_BITMAP_FUNCS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + bitmap_free(NULL); + bitmap_alloc(0, 0); + bitmap_zalloc(0, 0); + ],[ + AC_DEFINE(HAVE_BITMAP_FUNCS, + 1, + [bitmap_free(),bitmap_alloc(),bitmap_zalloc is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-cache.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-cache.m4 new file mode 100644 index 0000000000000..42911c2cc2131 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-cache.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit 913b2cb727b7a47ccf8842d54c89f1b873c6deed +dnl # drm: change func to better detect wether swiotlb is needed +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CACHE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_need_swiotlb(0); + ], [ + AC_DEFINE(HAVE_DRM_NEED_SWIOTLB, 1, + [drm_need_swiotlb() is availablea]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-atomic-hdr-metadata-equal.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-atomic-hdr-metadata-equal.m4 new file mode 100644 index 0000000000000..211e6fdd63702 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-atomic-hdr-metadata-equal.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.12-rc7-1582-g72921cdf8ac2 +dnl # drm/connector: Add helper to compare HDR metadata +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_ATOMIC_HDR_METADATA_EQUAL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_connector_atomic_hdr_metadata_equal(NULL, NULL); + ], [drm_connector_atomic_hdr_metadata_equal], [drivers/gpu/drm/drm_connector.c], [ + AC_DEFINE(HAVE_DRM_CONNECTOR_ATOMIC_HDR_METADATA_EQUAL, 1, + [drm_connector_atomic_hdr_metadata_equal() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-attach-hdr-output-metadata-property.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-attach-hdr-output-metadata-property.m4 new file mode 100644 index 0000000000000..7ea380c7d60eb --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-attach-hdr-output-metadata-property.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.12-rc7-1581-ge057b52c1d90 +dnl # drm/connector: Create a helper to attach the hdr_output_metadata property +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_ATTACH_HDR_OUTPUT_METADATA_PROPERTY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_connector_attach_hdr_output_metadata_property(NULL); + ], [drm_connector_attach_hdr_output_metadata_property], [drivers/gpu/drm/drm_connector.c], [ + AC_DEFINE(HAVE_DRM_CONNECTOR_ATTACH_HDR_OUTPUT_METADATA_PROPERTY, 1, + [drm_connector_attach_hdr_output_metadata_property() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-display-info-hdmi.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-display-info-hdmi.m4 new file mode 100644 index 0000000000000..7c3454c843d50 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-display-info-hdmi.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # dfbe9bf0 introduce this change +dnl # drm/amdgpu: replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi +dnl # v5.13-3121-gdfbe9bf067a2 +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DISPLAY_INFO_IS_HDMI], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_display_info *display_info = NULL; + display_info->is_hdmi = 0; + ], [ + AC_DEFINE(HAVE_DRM_DISPLAY_INFO_IS_HDMI, 1, + [display_info->is_hdmi is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-for-each-possible-encoder.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-for-each-possible-encoder.m4 new file mode 100644 index 0000000000000..0f57128fcf1a1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-for-each-possible-encoder.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # commit v5.3-rc1-656-g62afb4ad425a +dnl # drm/connector: Allow max possible encoders to attach to a connector +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + struct drm_connector *connector = NULL; + struct drm_encoder *encoder = NULL; + drm_connector_for_each_possible_encoder(connector, encoder) + return 0; + ],[ + AC_DEFINE(HAVE_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER_2ARGS, 1, + [drm_connector_for_each_possible_encoder() wants 2 arguments]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-helper-funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-helper-funcs.m4 new file mode 100644 index 0000000000000..c31a4f9b86b56 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-helper-funcs.m4 @@ -0,0 +1,42 @@ +dnl # +dnl # commit v5.2-rc2-529-g6f3b62781bbd +dnl # drm: Convert connector_helper_funcs->atomic_check to accept drm_atomic_state +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_connector_helper_funcs *p = NULL; + p->atomic_check(NULL, (struct drm_atomic_state*)NULL); + ], [ + AC_DEFINE(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE, 1, + [drm_connector_helper_funcs->atomic_check() wants struct drm_atomic_state arg]) + ]) + ]) +]) + +dnl # +dnl # v5.10-rc3-1075-geca22edb37d2 +dnl # drm: Pass the full state to connectors atomic functions +dnl # +AC_DEFUN([AC_AMDGPU_CONNECTOR_HELPER_FUNCTS_ATOMIC_BEST_ENCODER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_connector_helper_funcs *p = NULL; + p->atomic_best_encoder(NULL, (struct drm_atomic_state*)NULL); + ], [ + AC_DEFINE(HAVE_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_BEST_ENCODER_ARG_DRM_ATOMIC_STATE, 1, + [atomic_best_encoder take 2nd arg type of state as struct drm_atomic_state]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_HELPER_FUNCS], [ + AC_AMDGPU_DRM_CONNECTOR_HELPER_FUNCS_ATOMIC_CHECK + AC_AMDGPU_CONNECTOR_HELPER_FUNCTS_ATOMIC_BEST_ENCODER +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-init-with-ddc.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-init-with-ddc.m4 new file mode 100644 index 0000000000000..9af2f9f8226b4 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-init-with-ddc.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.3-rc1-330-g100163df4203 +dnl # drm: Add drm_connector_init() variant with ddc +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_INIT_WITH_DDC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_connector_init_with_ddc(NULL, NULL, NULL, 0, NULL); + ],[ + AC_DEFINE(HAVE_DRM_CONNECTOR_INIT_WITH_DDC, 1, + [drm_connector_init_with_ddc() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-set-panel-orientation-with-quirk.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-set-panel-orientation-with-quirk.m4 new file mode 100644 index 0000000000000..0ae5de382dec8 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-set-panel-orientation-with-quirk.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.5-rc2-1360-g69654c632d80 +dnl # drm/connector: Split out orientation quirk detection (v2) +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_SET_PANEL_ORIENTATION_WITH_QUIRK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_connector_set_panel_orientation_with_quirk(NULL, 0, 0, 0); + ],[drm_connector_set_panel_orientation_with_quirk], [drivers/gpu/drm/drm_connector.c], [ + AC_DEFINE(HAVE_DRM_CONNECTOR_SET_PANEL_ORIENTATION_WITH_QUIRK, 1, + [drm_connector_set_panel_orientation_with_quirk() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-state-hdcp-content-type.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-state-hdcp-content-type.m4 new file mode 100644 index 0000000000000..6d852b75d9e05 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-state-hdcp-content-type.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.3-rc1-377-g7672dbba85d3 +dnl # drm: Add Content protection type property +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector_state *state = NULL; + state->hdcp_content_type = 0; + ],[ + AC_DEFINE(HAVE_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE, 1, + [struct drm_connector_state has hdcp_content_type member]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-state-hdr-output-metadata.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-state-hdr-output-metadata.m4 new file mode 100644 index 0000000000000..d79d5c876e35b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-state-hdr-output-metadata.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.1-rc5-1688-gfbb5d0353c62 +dnl # drm: Add HDR source metadata property +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_STATE_HDR_OUTPUT_METADATA], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector_state *state = NULL; + state->hdr_output_metadata = NULL; + ],[ + AC_DEFINE(HAVE_DRM_CONNECTOR_STATE_HDR_OUTPUT_METADATA, 1, + [struct drm_connector_state has hdr_output_metadata member]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector-state.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector-state.m4 new file mode 100644 index 0000000000000..69753da371891 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector-state.m4 @@ -0,0 +1,26 @@ +dnl # +dnl # v4.20-rc3-425-g1398958cfd8d +dnl # drm: Add vrr_enabled property to drm CRTC +dnl # +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector_state *state = NULL; + state->colorspace = 0; + ],[ + AC_DEFINE(HAVE_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE, 1, + [struct drm_connector_state->colorspace is available]) + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector_state *state = NULL; + state->self_refresh_aware = 0; + ],[ + AC_DEFINE(HAVE_STRUCT_DRM_CONNECTOR_STATE_SELF_REFRESH_AWARE, 1, + [struct drm_connector_state->self_refresh_aware is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-connector.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-connector.m4 new file mode 100644 index 0000000000000..653c3e1e1ec98 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-connector.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # v6.1-rc1-146-g90b575f52c6a +dnl # drm/edid: detach debugfs EDID override from EDID property update +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_EDID_OVERRIDE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector *connector = NULL; + connector->edid_override = NULL; + ],[ + AC_DEFINE(HAVE_DRM_CONNECTOR_EDID_OVERRIDE, 1, + [drm_connector->edid_override is available]) + ]) + ]) +]) + + + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-crtc-force-disable-all.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-crtc-force-disable-all.m4 new file mode 100644 index 0000000000000..68ccba497ae81 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-crtc-force-disable-all.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit 6a0d95285035c43361c72776b4c618f60c0f4ab4 +dnl # drm: Add helpers to turn off CRTCs +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CRTC_FORCE_DISABLE_ALL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_crtc_force_disable_all(NULL); + ], [drm_crtc_force_disable_all], [drivers/gpu/drm/drm_crtc.c], [ + AC_DEFINE(HAVE_DRM_CRTC_FORCE_DISABLE_ALL, 1, + [drm_crtc_force_disable_all() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-crtc-helper-funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-crtc-helper-funcs.m4 new file mode 100644 index 0000000000000..5bfb416a8ed5e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-crtc-helper-funcs.m4 @@ -0,0 +1,45 @@ +dnl # +dnl # v5.10-rc2-260-g29b77ad7b9ca +dnl # drm/atomic: Pass the full state to CRTC atomic_check +dnl +dnl # v5.10-rc2-261-gf6ebe9f9c923 +dnl # drm/atomic: Pass the full state to CRTC atomic begin and flush +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CRTC_HELPER_FUNCS_ATOMIC_CHECK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_crtc_helper_funcs *p = NULL; + p->atomic_check(NULL, (struct drm_atomic_state*)NULL); + ], [ + AC_DEFINE(HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_CHECK_ARG_DRM_ATOMIC_STATE, 1, + [drm_crtc_helper_funcs->atomic_check()/atomic_flush()/atomic_begin() wants struct drm_atomic_state arg]) + ]) + ]) +]) + +dnl # +dnl # v5.9-rc5-1161-g351f950db4ab +dnl # drm/atomic: Pass the full state to CRTC atomic enable/disable +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CRTC_HELPER_FUNCS_ATOMIC_ENABLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_crtc_helper_funcs *p = NULL; + p->atomic_enable(NULL, (struct drm_atomic_state*)NULL); + ], [ + AC_DEFINE(HAVE_DRM_CRTC_HELPER_FUNCS_ATOMIC_ENABLE_ARG_DRM_ATOMIC_STATE, 1, + [drm_crtc_helper_funcs->atomic_enable()/atomic_disable() wants struct drm_atomic_state arg]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_CRTC_HELPER_FUNCS], [ + AC_AMDGPU_DRM_CRTC_HELPER_FUNCS_ATOMIC_CHECK + AC_AMDGPU_DRM_CRTC_HELPER_FUNCS_ATOMIC_ENABLE +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-crtc-state.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-crtc-state.m4 new file mode 100644 index 0000000000000..cc6860f55c68e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-crtc-state.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.3-rc3-2032-g4d85f45c73a2 +dnl # drm/atomic: Rename crtc_state->pageflip_flags to async_flip +dnl # +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_CRTC_STATE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_crtc_state *crtc_state = NULL; + crtc_state->async_flip = 0; + ],[ + AC_DEFINE(HAVE_STRUCT_DRM_CRTC_STATE_ASYNC_FLIP, 1, + [struct drm_crtc_state->async_flip is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-device-pdev.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-device-pdev.m4 new file mode 100644 index 0000000000000..7bbbd70aab907 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-device-pdev.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit b347e04452ff6382ace8fba9c81f5bcb63be17a6 +dnl # drm: Remove pdev field from struct drm_device +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DEVICE_PDEV], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_device *pdd = NULL; + pdd->pdev = NULL; + ], [ + AC_DEFINE(HAVE_DRM_DEVICE_PDEV, 1, [struct drm_device has pdev member]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-display-info.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-display-info.m4 new file mode 100644 index 0000000000000..ad9ebac619d6e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-display-info.m4 @@ -0,0 +1,82 @@ +dnl # +dnl # commit v4.9-rc1-522171951761153172c75b94ae1f4bc9ab631745 +dnl # drm: Extract drm_connector.[hc] +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DISPLAY_INFO_EDID_HDMI_RGB444_DC_MODES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_display_info *display_info = NULL; + display_info->edid_hdmi_rgb444_dc_modes = 0; + ], [ + AC_DEFINE(HAVE_DRM_DISPLAY_INFO_EDID_HDMI_RGB444_DC_MODES, 1, + [display_info->edid_hdmi_rgb444_dc_modes is available]) + ]) + ]) +]) + + +dnl # +dnl # commit v5.6-rc2-1062-ga1d11d1efe4d +dnl # drm/edid: Add function to parse EDID descriptors for monitor range +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DISPLAY_INFO_MONITOR_RANGE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_display_info *info = NULL; + info->monitor_range.min_vfreq=0; + info->monitor_range.max_vfreq=0; + ],[ + AC_DEFINE(HAVE_DRM_DISPLAY_INFO_MONITOR_RANGE, 1, + [struct drm_display_info has monitor_range member]) + ]) + ]) +]) + +dnl # +dnl # commit v5.18-3347-g721ed0ae5acf +dnl # drm/edid: add a quirk for two LG monitors to get them to work on 10bpc +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DISPLAY_INFO_MAX_DSC_BPP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_display_info *display_info = NULL; + display_info->max_dsc_bpp=0; + ],[ + AC_DEFINE(HAVE_DRM_DISPLAY_INFO_MAX_DSC_BPP, 1, + [display_info->max_dsc_bpp is available]) + ]) + ]) +]) + +dnl # +dnl # commit v6.1-rc1~27-a61bb3422e8d +dnl # drm/amdgpu_dm: Rely on split out luminance calculation function +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DISPLAY_INFO_LUMINANCE_RANGE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_display_info *display_info = NULL; + struct drm_luminance_range_info *luminance_range; + luminance_range = &display_info->luminance_range; + ],[ + AC_DEFINE(HAVE_DRM_DISPLAY_INFO_LUMINANCE_RANGE, 1, + [display_info->luminance_range is available]) + ]) + ]) +]) + + +AC_DEFUN([AC_AMDGPU_DRM_DISPLAY_INFO], [ + AC_AMDGPU_DRM_DISPLAY_INFO_EDID_HDMI_RGB444_DC_MODES + AC_AMDGPU_DRM_DISPLAY_INFO_MONITOR_RANGE + AC_AMDGPU_DRM_DISPLAY_INFO_MAX_DSC_BPP + AC_AMDGPU_DRM_DISPLAY_INFO_LUMINANCE_RANGE +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-atomic-funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-atomic-funcs.m4 new file mode 100644 index 0000000000000..c0441f45a7861 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-atomic-funcs.m4 @@ -0,0 +1,134 @@ +dnl # +dnl # commit edb1ed1ab7d314e114de84003f763da34c0f34c0 +dnl # drm/dp: Add DP MST helpers to atomically find and release vcpi slots +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_ATOMIC_FIND_VCPI_SLOTS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + int retval; + retval = drm_dp_atomic_find_vcpi_slots(NULL, NULL, NULL, 0, 0); + ], [ + AC_DEFINE(HAVE_DRM_DP_ATOMIC_FIND_VCPI_SLOTS_5ARGS, 1, + [drm_dp_atomic_find_vcpi_slots() wants 5args]) + ]) + ]) +]) + + +dnl # +dnl # commit v5.19-rc6-1758-gdf78f7f660cd +dnl # drm/display/dp_mst: Call them time slots, not VCPI slots +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_ATOMIC_RELEASE_VCPI_SLOTS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + int ret; + ret = drm_dp_atomic_release_time_slots(NULL, NULL, NULL); + ],[ + AC_DEFINE(HAVE_DRM_DP_ATOMIC_RELEASE_TIME_SLOTS, 1, + [drm_dp_atomic_release_time_slots() is available]) + ],[ + dnl # + dnl # commit v4.20-rc4-1031-geceae1472467 + dnl # drm/dp_mst: Start tracking per-port VCPI allocations + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + int ret; + struct drm_dp_mst_port *port = NULL; + ret = drm_dp_atomic_release_vcpi_slots(NULL, NULL, port); + ],[ + AC_DEFINE(HAVE_DRM_DP_ATOMIC_RELEASE_VCPI_SLOTS_MST_PORT, 1, + [drm_dp_atomic_release_vcpi_slots() with drm_dp_mst_port argument is available]) + ]) + ]) + ]) +]) + + +dnl # +dnl # commit v5.19-rc6-1758-gdf78f7f660cd +dnl # drm/display/dp_mst: Call them time slots, not VCPI slots +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_ATOMIC_FIND_TIME_SLOTS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + int ret; + ret = drm_dp_atomic_find_time_slots(NULL, NULL, NULL, 0); + ],[ + AC_DEFINE(HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS, 1, + [drm_dp_atomic_find_time_slots() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v6.1-rc1~27-a5c2c0d164e9 +dnl # drm/display/dp_mst: Add nonblocking helpers for DP MST +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_ATOMIC_SETUP_COMMIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + int ret; + ret = drm_dp_mst_atomic_setup_commit(NULL); + ],[ + AC_DEFINE(HAVE_DRM_DP_ATOMIC_SETUP_COMMIT, 1, + [drm_dp_mst_atomic_setup_commit() is available]) + ]) + ]) +]) + +drm_dp_mst_atomic_wait_for_dependencies + +dnl # +dnl # commit v6.1-rc1~27-a5c2c0d164e9 +dnl # drm/display/dp_mst: Add nonblocking helpers for DP MST +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_ATOMIC_WAIT_FOR_DEPENDENCIES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_dp_mst_atomic_wait_for_dependencies(NULL); + ],[ + AC_DEFINE(HAVE_DRM_DP_ATOMIC_WAIT_FOR_DEPENDENCIES, 1, + [drm_dp_mst_atomic_wait_for_dependencies() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v6.1-rc1~27-a5c2c0d164e9 +dnl # drm/display/dp_mst: Add nonblocking helpers for DP MST +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_ROOT_CONN_ATOMIC_CHECK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + int ret; + ret = drm_dp_mst_root_conn_atomic_check(NULL, NULL); + ],[ + AC_DEFINE(HAVE_DRM_DP_MST_ROOT_CONN_ATOMIC_CHECK, 1, + [drm_dp_mst_root_conn_atomic_check() is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_DP_ATOMIC_FUNCS], [ + AC_AMDGPU_DRM_DP_ATOMIC_FIND_VCPI_SLOTS + AC_AMDGPU_DRM_DP_ATOMIC_RELEASE_VCPI_SLOTS + AC_AMDGPU_DRM_DP_ATOMIC_FIND_TIME_SLOTS + AC_AMDGPU_DRM_DP_ATOMIC_SETUP_COMMIT + AC_AMDGPU_DRM_DP_ATOMIC_WAIT_FOR_DEPENDENCIES + AC_AMDGPU_DRM_DP_MST_ROOT_CONN_ATOMIC_CHECK +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-aux-drm-dev.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-aux-drm-dev.m4 new file mode 100644 index 0000000000000..cef22c56ee51c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-aux-drm-dev.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.12-rc7-1495-g6cba3fe43341 +dnl # drm/dp: Add backpointer to drm_device in drm_dp_aux +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_AUX_DRM_DEV], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + struct drm_dp_aux dda; + dda.drm_dev = NULL; + ], [],[],[ + AC_DEFINE(HAVE_DRM_DP_AUX_DRM_DEV, 1, + [struct drm_dp_aux has member named 'drm_dev']) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-calc-pbn-mode.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-calc-pbn-mode.m4 new file mode 100644 index 0000000000000..7261c98f40b18 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-calc-pbn-mode.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # commit v5.5-rc2-902-gdc48529fb14e +dnl # drm/dp_mst: Add PBN calculation for DSC modes +dnl # +dnl #v6.6-rc2-668-g7707dd602259 +dnl #drm/dp_mst: Fix fractional DSC bpp handling +dnl +AC_DEFUN([AC_AMDGPU_DRM_DP_CALC_PBN_MODE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dp_calc_pbn_mode(0, 0, 0); + ], [ + AC_DEFINE(HAVE_DRM_DP_CALC_PBN_MODE_3ARGS, 1, + [drm_dp_calc_pbn_mode() wants 3 args]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-cec-correlation-functions.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-cec-correlation-functions.m4 new file mode 100644 index 0000000000000..3ded1ff2015a2 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-cec-correlation-functions.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.3-rc1-555-gae85b0df124f +dnl # drm_dp_cec: add connector info support. +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_CEC_CORRELATION_FUNCTIONS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dp_cec_register_connector(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_CEC_REGISTER_CONNECTOR_PP, 1, + [drm_dp_cec_register_connector() wants p,p interface]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-link-train-channel-eq-delay.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-link-train-channel-eq-delay.m4 new file mode 100644 index 0000000000000..32db29f7c3936 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-link-train-channel-eq-delay.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit v5.12-rc7-1498-g0c4fada608c1 +dnl # drm/dp: Pass drm_dp_aux to drm_dp*_link_train_channel_eq_delay() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_LINK_TRAIN_CHANNEL_EQ_DELAY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + struct drm_dp_aux *aux = NULL; + const u8 dpcd[DP_RECEIVER_CAP_SIZE]; + drm_dp_link_train_channel_eq_delay(aux, dpcd); + ], [drm_dp_link_train_channel_eq_delay],[drivers/gpu/drm/drm_dp_helper.c drivers/gpu/drm/display/drm_dp_helper.c],[ + AC_DEFINE(HAVE_DRM_DP_LINK_TRAIN_CHANNEL_EQ_DELAY_2ARGS, 1, + [drm_dp_link_train_channel_eq_delay() has 2 args]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-link-train-clock-recovery-delay.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-link-train-clock-recovery-delay.m4 new file mode 100644 index 0000000000000..327cd21b0200c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-link-train-clock-recovery-delay.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit v5.12-rc7-1497-g9e9866664456 +dnl # drm/dp: Pass drm_dp_aux to drm_dp_link_train_clock_recovery_delay() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_LINK_TRAIN_CLOCK_RECOVERY_DELAY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + struct drm_dp_aux *aux = NULL; + const u8 dpcd[DP_RECEIVER_CAP_SIZE]; + drm_dp_link_train_clock_recovery_delay(aux, dpcd); + ], [drm_dp_link_train_clock_recovery_delay],[drivers/gpu/drm/drm_dp_helper.c drivers/gpu/drm/display/drm_dp_helper.c],[ + AC_DEFINE(HAVE_DRM_DP_LINK_TRAIN_CLOCK_RECOVERY_DELAY_2ARGS, 1, + [drm_dp_link_train_clock_recovery_delay() has 2 args]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-add-affected-dsc-crtcs.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-add-affected-dsc-crtcs.m4 new file mode 100644 index 0000000000000..c9127ca8d82c3 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-add-affected-dsc-crtcs.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit 971fb192aaeb4b5086ac3f21d00943a5e1431176 +dnl # drm/dp_mst: Add helper to trigger modeset on affected DSC MST CRTCs +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_ADD_AFFECTED_DSC_CRTCS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + int ret; + ret = drm_dp_mst_add_affected_dsc_crtcs(NULL, NULL); + ], [drm_dp_mst_dsc_aux_for_port], [drivers/gpu/drm/drm_dp_mst_topology.c drivers/gpu/drm/display/drm_dp_mst_topology.c], [ + AC_DEFINE(HAVE_DRM_DP_MST_ADD_AFFECTED_DSC_CRTCS, 1, + [drm_dp_mst_add_affected_dsc_crtcs() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-atomic-check.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-atomic-check.m4 new file mode 100644 index 0000000000000..e56284f5a5a2c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-atomic-check.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit eceae147246749c6dbaeefda802b30f804a3c54c +dnl # drm/dp_mst: Start tracking per-port VCPI allocations +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_ATOMIC_CHECK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + int ret; + ret = drm_dp_mst_atomic_check(NULL); + ], [drm_dp_mst_atomic_check], [drivers/gpu/drm/drm_dp_mst_topology.c drivers/gpu/drm/display/drm_dp_mst_topology.c], [ + AC_DEFINE(HAVE_DRM_DP_MST_ATOMIC_CHECK, 1, + [drm_dp_mst_atomic_check() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-atomic-enable-dsc.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-atomic-enable-dsc.m4 new file mode 100644 index 0000000000000..ad901210aeaba --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-atomic-enable-dsc.m4 @@ -0,0 +1,32 @@ +dnl # +dnl # commit 0529a1d385b9ce6cd7498d180f720eeb3f755980 +dnl # drm/dp_mst: Add DSC enablement helpers to DRM +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_ATOMIC_ENABLE_DSC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_dp_mst_atomic_enable_dsc(NULL, NULL, 0, 0, false); + ], [drm_dp_mst_atomic_enable_dsc], [drivers/gpu/drm/drm_dp_mst_topology.c], [ + AC_DEFINE(HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC, 1, + [drm_dp_mst_atomic_enable_dsc() is available]) + AC_DEFINE(HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC_WITH_5_ARGS, 1, + [drm_dp_mst_atomic_enable_dsc() wants 5args]) + ],[ + dnl # + dnl # commit 4d07b0bc403403438d9cf88450506240c5faf92f + dnl # drm/display/dp_mst: Move all payload info into the atomic state + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + int vcpi; + vcpi = drm_dp_mst_atomic_enable_dsc(NULL, NULL, 0, false); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_ATOMIC_ENABLE_DSC, 1, + [drm_dp_atomic_find_vcpi_slots() is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-detect-port.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-detect-port.m4 new file mode 100644 index 0000000000000..4198140ed6a0e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-detect-port.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.4-rc4-752-g3f9b3f02dda5 +dnl # drm/dp_mst: Protect drm_dp_mst_port members with locking +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_DETECT_PORT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + int ret; + ret = drm_dp_mst_detect_port(NULL, NULL, NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_DETECT_PORT_PPPP, 1, + [drm_dp_mst_detect_port() wants p,p,p,p args]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-dsc-aux-for-port.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-dsc-aux-for-port.m4 new file mode 100644 index 0000000000000..6d83fb019062d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-dsc-aux-for-port.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit d251c02a2b78245bb32d7909a66b06285f7922a2 +dnl # drm/dp_mst: Add helpers for MST DSC and virtual DPCD aux +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_DSC_AUX_FOR_PORT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_dp_mst_dsc_aux_for_port(NULL); + ], [drm_dp_mst_dsc_aux_for_port], [drivers/gpu/drm/drm_dp_mst_topology.c drivers/gpu/drm/display/drm_dp_mst_topology.c], [ + AC_DEFINE(HAVE_DRM_DP_MST_DSC_AUX_FOR_PORT, 1, + [drm_dp_mst_dsc_aux_for_port() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port-full-pbn.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port-full-pbn.m4 new file mode 100644 index 0000000000000..e45e020edc7fc --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port-full-pbn.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.6-rc5-4-gfcf463807596 +dnl # drm/dp_mst: Use full_pbn instead of available_pbn for bandwidth checks +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_PORT_FULL_PBN], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_port *mst_port = NULL; + mst_port->full_pbn = 0; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_PORT_FULL_PBN, 1, + [drm_dp_mst_port struct has full_pbn member]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 new file mode 100644 index 0000000000000..c66ffa1496233 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.18-2579-g3af4b1f1d6e7 +dnl # "drm/dp_mst: add passthrough_aux to struct drm_dp_mst_port" +dnl +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_port *dp_mst_port = NULL; + dp_mst_port->passthrough_aux = NULL; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX, 1, + [struct drm_dp_mst_port has passthrough_aux member]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-cbs.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-cbs.m4 new file mode 100644 index 0000000000000..4e54d919e6a3c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-cbs.m4 @@ -0,0 +1,76 @@ +dnl # +dnl # commit v4.20-rc4-941-g16bff572cc66 +dnl # drm/dp-mst-helper: Remove hotplug callback +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_HOTPLUG], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_cbs *dp_mst_cbs = NULL; + dp_mst_cbs->hotplug(NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_CBS_HOTPLUG, 1, + [struct drm_dp_mst_topology_cbs has hotplug member]) + ]) +]) + + +dnl # +dnl # commit v5.6-rc2-1065-ga5c4dc165957 +dnl # drm/dp_mst: Remove register_connector callback +dnl # +dnl # commit v4.3-rc3-39-gd9515c5ec1a2 +dnl # drm/dp/mst: split connector registration into two parts (v2) +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_REGISTER_CONNECTOR], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_cbs *dp_mst_cbs = NULL; + dp_mst_cbs->register_connector(NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_CBS_REGISTER_CONNECTOR, 1, + [struct drm_dp_mst_topology_cbs->register_connector is available]) + ]) +]) + +dnl # +dnl # commit v5.6-rc5-1703-g72dc0f515913 +dnl # drm/dp_mst: Remove drm_dp_mst_topology_cbs.destroy_connector +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_cbs *dp_mst_cbs = NULL; + dp_mst_cbs->destroy_connector(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR, 1, + [struct drm_dp_mst_topology_cbs->destroy_connector is available]) + ]) +]) + +dnl # +dnl # commit v5.7-rc1-646-g471bdd0df0d5 +dnl # drm/i915/dp_mst: Work around out-of-spec adapters filtering short pulses +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_POLL_HPD_IRQ], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_cbs *dp_mst_cbs = NULL; + dp_mst_cbs->poll_hpd_irq(NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_CBS_POLL_HPD_IRQ, 1, + [struct drm_dp_mst_topology_cbs->poll_hpd_irq is available]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_HOTPLUG + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_REGISTER_CONNECTOR + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS_POLL_HPD_IRQ + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-mgr-resume.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-mgr-resume.m4 new file mode 100644 index 0000000000000..3c491e182062e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-mgr-resume.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v5.4-rc4-759-g6f85f73821f6 +dnl # drm/dp_mst: Add basic topology reprobing when resuming +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_RESUME], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + int ret; + ret = drm_dp_mst_topology_mgr_resume(NULL, 0); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_MGR_RESUME_2ARGS, 1, + [drm_dp_mst_topology_mgr_resume() wants 2 args]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-state.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-state.m4 new file mode 100644 index 0000000000000..d5928adc09844 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology-state.m4 @@ -0,0 +1,89 @@ +dnl # +dnl # commit v4.11-rc7-1869-g3f3353b7e121 +dnl # drm/dp: Introduce MST topology state to track available link bandwidth +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_STATE_TOTAL_AVAIL_SLOTS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_state * mst_state = NULL; + mst_state->total_avail_slots = 0; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_STATE_TOTAL_AVAIL_SLOTS, 1, + [struct drm_dp_mst_topology_state has member total_avail_slots]) + ]) + ]) +]) + + +dnl # +dnl # commit 8366f01fb15a54281c193658d1a916f6f2d5eb1e +dnl # drm/display/dp_mst: Move all payload info into the atomic state +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_state * mst_state = NULL; + struct list_head payloads; + payloads = mst_state->payloads; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS, 1, + [struct drm_dp_mst_topology_state has member payloads]) + ]) + ]) +]) + + +dnl # +dnl # commit v5.19-rc6-1771-g4d07b0bc4034 +dnl # drm/display/dp_mst: Move all payload info into the atomic state +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_state * mst_state = NULL; + mst_state->pbn_div = 0; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_INT, 1, + [struct drm_dp_mst_topology_state has member pbn_div]) + ]) + ]) + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dp_mst_topology_state * mst_state = NULL; + fixed20_12 pbn_div; + pbn_div = mst_state->pbn_div; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV_UNION, 1, + [struct drm_dp_mst_topology_state has union member pbn_div]) + ]) + ]) +]) + +dnl # +dnl # commit v6.9-rc6-1554-g8a0a7b98d4b6 +dnl # drm/mst: Fix NULL pointer dereference at drm_dp_add_payload_part2 +dnl # +dnl # commit v5.19-rc6-1771-g4d07b0bc4034 +dnl # drm/display/dp_mst: Move all payload info into the atomic state +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_ADD_PAYLOAD_PART2_THREE_ARGUMENTS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + int a = 0; + a = drm_dp_add_payload_part2(NULL, NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_ADD_PAYLOAD_PART2_THREE_ARGUMENTS, 1, + [drm_dp_add_payload_part2 has three arguments]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology.m4 new file mode 100644 index 0000000000000..6de9a34aa627d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-topology.m4 @@ -0,0 +1,45 @@ +dnl # +dnl # commit 1e797f556c616a42f1e039b1ff1d3c58f61b6104 +dnl # drm/dp: Split drm_dp_mst_allocate_vcpi +dnl # +dnl # Note: This autoconf only works with compiler flag -Werror +dnl # The interface types are specified in Hungarian notation +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY], [ + dnl # + dnl # commit d25689760b747287c6ca03cfe0729da63e0717f4 + dnl # drm/amdgpu/display: Keep malloc ref to MST port + dnl # + dnl # commit ebcc0e6b509108b4a67daa4c55809a05ab7f4b77 + dnl # drm/dp_mst: Introduce new refcounting scheme for mstbs and ports + dnl # + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dp_mst_get_port_malloc(NULL); + drm_dp_mst_put_port_malloc(NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC, 1, [ + drm_dp_mst_{get,put}_port_malloc() is available]) + ]) + ]) + dnl # + dnl # commit aad0eab4e8dd76d1ba5248f9278633829cbcec38 + dnl # drm/dp_mst: Enable registration of AUX devices for MST ports + dnl # + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dp_mst_connector_early_unregister(NULL, NULL); + drm_dp_mst_connector_late_register(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_CONNECTOR_EARLY_UNREGISTER, 1, [ + drm_dp_mst_connector_early_unregister() is available]) + AC_DEFINE(HAVE_DRM_DP_MST_CONNECTOR_LATE_REGISTER, 1, [ + drm_dp_mst_connector_late_register() is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-remove-payload-part.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-remove-payload-part.m4 new file mode 100644 index 0000000000000..10468731e64f9 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-remove-payload-part.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v6.3-5135-g7c5343f2a753 +dnl # "drm/mst: Refactor the flow for payload allocation/removement" +dnl +AC_DEFUN([AC_AMDGPU_DRM_DP_REMOVE_RAYLOAD_PART], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_dp_remove_payload_part1(NULL, NULL, NULL); + ], [drm_dp_remove_payload_part1],[drivers/gpu/drm/display/drm_dp_mst_topology.c],[ + AC_DEFINE(HAVE_DRM_DP_REMOVE_RAYLOAD_PART, 1, + [drm_dp_remove_payload_part{1,2}() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-send-real-edid-checksum.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-send-real-edid-checksum.m4 new file mode 100644 index 0000000000000..44343cb753bbe --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-send-real-edid-checksum.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit e11f5bd8228fc3760c221f940b9f6365dbf3e7ed +dnl # drm: Add support for DP 1.4 Compliance edid corruption test +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_SEND_REAL_EDID_CHECKSUM], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_dp_send_real_edid_checksum(NULL, 0); + ], [drm_dp_send_real_edid_checksum], [drivers/gpu/drm/drm_dp_helper.c drivers/gpu/drm/display/drm_dp_helper.c], [ + AC_DEFINE(HAVE_DRM_DP_SEND_REAL_EDID_CHECKSUM, 1, + [drm_dp_send_real_edid_checksum() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-driver-feature.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-driver-feature.m4 new file mode 100644 index 0000000000000..5ca66e8289bc1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-driver-feature.m4 @@ -0,0 +1,57 @@ +dnl # +dnl # commit 0e2a933b02c972919f7478364177eb76cd4ae00d +dnl # drm: Switch DRIVER_ flags to an enum +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DRIVER_FEATURE], [ + dnl # + dnl # commit: v5.1-rc5-1467-g060cebb20cdb + dnl # drm: introduce a capability flag for syncobj timeline support + dnl # + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + int flag; + flag = DRIVER_SYNCOBJ_TIMELINE; + ],[ + AC_DEFINE(HAVE_DRM_DRV_DRIVER_SYNCOBJ_TIMELINE, 1, [ + drm_driver_feature DRIVER_SYNCOBJ_TIMELINE is available]) + ]) + ]) + + dnl # + dnl # commit: v5.0-rc1-390-g1ff494813baf + dnl # drm/irq: Ditch DRIVER_IRQ_SHARED + dnl # + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + int flag; + flag = DRIVER_IRQ_SHARED; + ],[ + AC_DEFINE(HAVE_DRM_DRV_DRIVER_IRQ_SHARED, 1, [ + drm_driver_feature DRIVER_IRQ_SHARED is available]) + ]) + ]) + + dnl # + dnl # commit: v5.2-rc5-867-g0424fdaf883a + dnl # drm/prime: Actually remove DRIVER_PRIME everywhere + dnl # + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + int flag; + flag = DRIVER_PRIME; + ],[ + AC_DEFINE(HAVE_DRM_DRV_DRIVER_PRIME, 1, [ + drm_driver_feature DRIVER_PRIME is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-driver-gem-open-object.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-driver-gem-open-object.m4 new file mode 100644 index 0000000000000..0030f68fac152 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-driver-gem-open-object.m4 @@ -0,0 +1,50 @@ +AC_DEFUN([AC_AMDGPU_DRM_DRIVER_GEM_OPEN_OBJECT], [ + AC_KERNEL_DO_BACKGROUND([ + dnl # + dnl # commit v5.10-rc2-329-g49a3f51dfeee + dnl # drm/gem: Use struct dma_buf_map in GEM vmap ops and convert GEM backends + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_gem_object_funcs *funcs = NULL; + funcs->vmap(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_2ARGS, 1, [drm_gem_object_funcs->vmap() has 2 args]) + ],[ + dnl # commit v5.9-rc5-1077-gd693def4fd1c + dnl # drm: Remove obsolete GEM and PRIME callbacks from struct drm_driver + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + struct drm_driver *drv = NULL; + drv->gem_open_object = NULL; + ],[ + AC_DEFINE(HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK, 1, + [drm_gem_open_object is defined in struct drm_drv]) + ]) + ]) + ]) +]) + + +dnl # +dnl # commit v5.17-rc2-157-g7938f4218168 +dnl # dma-buf-map: Rename to iosys-map +dnl # +AC_DEFUN([AC_AMDGPU_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_gem_object_funcs *funcs = NULL; + struct iosys_map *map = NULL; + funcs->vmap(NULL, map); + ], [ + AC_DEFINE(HAVE_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG, 1, + [drm_gem_object_funcs.vmap hsa iosys_map arg]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-driver-gem-prime-res-obj.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-driver-gem-prime-res-obj.m4 new file mode 100644 index 0000000000000..80cba3a5a1459 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-driver-gem-prime-res-obj.m4 @@ -0,0 +1,38 @@ +dnl # +dnl # commit v5.3-rc1-325-g51c98747113e +dnl # drm/amdgpu: Fill out gem_object->resv +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DRIVER_GEM_PRIME_RES_OBJ], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_driver *drv = NULL; + drv->gem_prime_res_obj(NULL); + ], [ + AC_DEFINE(HAVE_DRM_DRIVER_GEM_PRIME_RES_OBJ, 1, + [drm_driver->gem_prime_res_obj() is available]) + ]) + ]) +]) +dnl # +dnl # commit 4.9-rc4-834-g85e634bce01a +dnl # drm: Extract drm_drv.h +dnl # +dnl # commit v6.4-rc2-425-g0adec22702d4 +dnl # drm: Remove struct drm_driver.gem_prime_mmap +AC_DEFUN([AC_AMDGPU_DRM_DRIVER_GEM_PRIME_MMAP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_driver *drv = NULL; + drv->gem_prime_mmap(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DRIVER_GEM_PRIME_MMAP, 1, + [drm_driver->gem_prime_mmap() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-drv-gem-prime-export.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-drv-gem-prime-export.m4 new file mode 100644 index 0000000000000..bac95ca04518c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-drv-gem-prime-export.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.2-rc5-870-ge4fa8457b219 +dnl # drm/prime: Align gem_prime_export with obj_funcs.export +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DRV_GEM_PRIME_EXPORT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_gem_prime_export(NULL, 0); + ],[ + AC_DEFINE(HAVE_DRM_DRV_GEM_PRIME_EXPORT_PI, 1, + [drm_gem_prime_export() with p,i arg is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-edid.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-edid.m4 new file mode 100644 index 0000000000000..e2a939c448834 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-edid.m4 @@ -0,0 +1,67 @@ +dnl # +dnl # 13d0add333afea7b2fef77473232b10dea3627dd +dnl # drm/edid: Pass connector to AVI infoframe functions +dnl # +AC_DEFUN([AC_AMDGPU_DRM_EDID], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + struct hdmi_avi_infoframe *frame = NULL; + struct drm_connector *connector = NULL; + const struct drm_display_mode *mode = NULL; + drm_hdmi_avi_infoframe_from_display_mode(frame, connector, mode); + ], [drm_hdmi_avi_infoframe_from_display_mode], [drivers/gpu/drm/drm_edid.c], [ + AC_DEFINE(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_P, 1, + [drm_hdmi_avi_infoframe_from_display_mode() has p,p,p interface]) + ], [ + dnl # + dnl # 10a8512008655d5ce62f8c56323a6b5bd221c920 + dnl # drm: Add HDMI infoframe helpers + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + struct hdmi_avi_infoframe *frame = NULL; + const struct drm_display_mode *mode = NULL; + bool is_hdmi2_sink = false; + drm_hdmi_avi_infoframe_from_display_mode(frame, mode, is_hdmi2_sink); + ], [drm_hdmi_avi_infoframe_from_display_mode], [drivers/gpu/drm/drm_edid.c], [ + AC_DEFINE(HAVE_DRM_HDMI_AVI_INFOFRAME_FROM_DISPLAY_MODE_P_P_B, 1, + [drm_hdmi_avi_infoframe_from_display_mode() has p,p,b interface]) + ]) + ]) + ]) +]) + +dnl # +dnl # v6.1-rc1-143-g019b93874834 +dnl # drm/edid: rename drm_add_override_edid_modes() to drm_edid_override_connector_update() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_EDID_OVERRIDE_CONNECTOR_UPDATE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + int ret; + ret = drm_edid_override_connector_update(NULL); + ],[ + AC_DEFINE(HAVE_DRM_EDID_OVERRIDE_CONNECTOR_UPDATE, 1, + [drm_edid_override_connector_update() is available]) + ],[ + dnl # + dnl # v5.2-rc2-25-g48eaeb7664c7 + dnl # drm: add fallback override/firmware EDID modes workaround + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + int ret; + ret = drm_add_override_edid_modes(NULL); + ],[ + AC_DEFINE(HAVE_DRM_ADD_OVERRIDE_EDID_MODES, 1, + [drm_add_override_edid_modes() is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-exec.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-exec.m4 new file mode 100644 index 0000000000000..b481cf6b7945e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-exec.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # commit 05d249352f1ae909230c230767ca8f4e9fdf8e7b +dnl # drm/exec: Pass in initial # of objects +dnl # +AC_DEFUN([AC_AMDGPU_DRM_EXEC_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_exec_init(NULL, 0, 0); + ], [ + AC_DEFINE(HAVE_DRM_EXEC_INIT_3_ARGUMENTS, 1, + [drm_exec() has 3 arguments]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_EXEC], [ + AC_AMDGPU_DRM_EXEC_INIT +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-fb-helper-fill-info.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-fb-helper-fill-info.m4 new file mode 100644 index 0000000000000..7eddabe7c8387 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-fb-helper-fill-info.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit ec8bf1942567bf0736314da9723e93bcc73c131f +dnl # drm/fb-helper: Fixup fill_info cleanup +dnl # +AC_DEFUN([AC_AMDGPU_DRM_FB_HELPER_FILL_INFO], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + drm_fb_helper_fill_info(NULL, NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_FB_HELPER_FILL_INFO, 1, + [drm_fb_helper_fill_info() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-gem-object-ref.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-gem-object-ref.m4 new file mode 100644 index 0000000000000..8f42e769f8845 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-gem-object-ref.m4 @@ -0,0 +1,42 @@ +dnl # +dnl # v5.7-rc1-518-gab15d56e27be drm: remove transient drm_gem_object_put_unlocked() +dnl # v5.7-rc1-491-geecd7fd8bf58 drm/gem: add _locked suffix to drm_gem_object_put +dnl # v5.7-rc1-490-gb5d250744ccc drm/gem: fold drm_gem_object_put_unlocked and __drm_gem_object_put() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_GEM_OBJECT_PUT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_gem_object_put(NULL); + ], [ + AC_DEFINE(HAVE_DRM_GEM_OBJECT_PUT, 1, + [drm_gem_object_put() is available]) + + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_gem_object_put(NULL); + ],[drm_gem_object_put],[drivers/gpu/drm/drm_gem.c], [ + AC_DEFINE(HAVE_DRM_GEM_OBJECT_PUT_SYMBOL, 1, + [drm_gem_object_put() is exported]) + ]) + ]) + ]) +]) + +dnl # +dnl # v6.8-rc3-286-gb31f5eba32ae drm: add drm_gem_object_is_shared_for_memory_stats() helper +dnl # +AC_DEFUN([AC_AMDGPU_DRM_GEM_OBJECT_IS_SHARED_FOR_MEMORY_STATS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_gem_object_is_shared_for_memory_stats(NULL); + ], [ + AC_DEFINE(HAVE_DRM_GEM_OBJECT_IS_SHARED_FOR_MEMORY_STATS, 1, + [drm_gem_object_is_shared_for_memory_stats() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-hdcp-update-content-protection.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-hdcp-update-content-protection.m4 new file mode 100644 index 0000000000000..55570026acaff --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-hdcp-update-content-protection.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.3-rc1-380-gbb5a45d40d50 +dnl # drm/hdcp: update content protection property with uevent +dnl # +AC_DEFUN([AC_AMDGPU_DRM_HDCP_UPDATE_CONTENT_PROTECTION], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_hdcp_update_content_protection(NULL, 0); + ], [ + AC_DEFINE(HAVE_DRM_HDCP_UPDATE_CONTENT_PROTECTION, 1, + [drm_hdcp_update_content_protection is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-hdr-sink-metadata.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-hdr-sink-metadata.m4 new file mode 100644 index 0000000000000..31c75e5910a4e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-hdr-sink-metadata.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # commit fbb5d0353c62d10c3699ec844d2d015a762952d7 +dnl # drm: Add HDR source metadata property +dnl # + +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_HAVE_HDR_SINK_METADATA], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector *dc = NULL; + struct hdr_sink_metadata *p = NULL; + + p = &dc->hdr_sink_metadata; + ],[ + AC_DEFINE(HAVE_HDR_SINK_METADATA, 1, + [drm_connector_hdr_sink_metadata() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-headers.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-headers.m4 new file mode 100644 index 0000000000000..6b06ae6f73fef --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-headers.m4 @@ -0,0 +1,120 @@ +AC_DEFUN([AC_AMDGPU_DRM_HEADERS], [ + dnl # + dnl # RHEL 7.x wrapper + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_backport.h]) + + dnl # + dnl # Optional devices ID for amdgpu driver + dnl # + AC_KERNEL_CHECK_HEADERS([drm/amdgpu_pciid.h]) + + dnl # + dnl # commit v5.0-rc1-342-gfcd70cd36b9b + dnl # drm: Split out drm_probe_helper.h + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_probe_helper.h]) + + dnl # + dnl # v5.4-rc1-214-g4e98f871bcff + dnl # drm: delete drmP.h + drm_os_linux.h + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drmP.h]) + + dnl # + dnl # commit v5.5-rc2-783-g368fd0aad1be + dnl # drm: Add Reusable task barrier. + dnl # + AC_KERNEL_CHECK_HEADERS([drm/task_barrier.h]) + + dnl # + dnl # v5.6-rc5-1258-gc6603c740e0e + dnl # drm: add managed resources tied to drm_device + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_managed.h]) + + dnl # + dnl # Required by AC_KERNEL_SUPPORTED_AMD_CHIPS macro + dnl # + AC_KERNEL_CHECK_HEADERS([drm/amd_asic_type.h]) + + dnl # + dnl # v5.12-rc3-330-g2916059147ea + dnl # drm/aperture: Add infrastructure for aperture ownership + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_aperture.h]) + + dnl # + dnl # v5.16-rc5-872-g5b529e8d9c38 + dnl # drm/dp: Move public DisplayPort headers into dp/ + dnl # + AC_KERNEL_CHECK_HEADERS([drm/dp/drm_dp_helper.h]) + + dnl # + dnl # v5.16-rc5-872-g5b529e8d9c38 + dnl # drm/dp: Move public DisplayPort headers into dp/ + dnl # + AC_KERNEL_CHECK_HEADERS([drm/dp/drm_dp_mst_helper.h]) + + dnl # + dnl # v5.11-rc2-620-g6dd7b6ce43ac + dnl # drm: Add additional atomic helpers for shadow-buffered planes + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_gem_atomic_helper.h]) + + dnl # + dnl # v5.18-rc2-594-gda68386d9edb + dnl # drm: Rename dp/ to display/ + dnl # + AC_KERNEL_CHECK_HEADERS([drm/display/drm_dp_helper.h]) + + dnl # + dnl # v5.18-rc2-594-gda68386d9edb + dnl # drm: Rename dp/ to display/ + dnl # + AC_KERNEL_CHECK_HEADERS([drm/display/drm_dp_mst_helper.h]) + + AC_KERNEL_CHECK_HEADERS([drm/display/drm_dsc.h]) + AC_KERNEL_CHECK_HEADERS([drm/display/drm_dsc_helper.h]) + AC_KERNEL_CHECK_HEADERS([drm/display/drm_hdmi_helper.h]) + AC_KERNEL_CHECK_HEADERS([drm/display/drm_hdcp_helper.h]) + AC_KERNEL_CHECK_HEADERS([drm/display/drm_hdcp.h]) + AC_KERNEL_CHECK_HEADERS([drm/display/drm_dp.h]) + + dnl # + dnl # v5.7-13141-gca5999fde0a1 + dnl # mm: introduce include/linux/pgtable.h + dnl # + AC_KERNEL_CHECK_HEADERS([linux/pgtable.h]) + + dnl # + dnl # v6.1-rc2-542-g8ab59da26bc0 + dnl # drm/fb-helper: Move generic fbdev emulation into separate source file + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_fbdev_generic.h]) + + dnl # + dnl # v6.2-rc6-1265-g849ee8a2f0df + dnl # drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_suballoc.h]) + + dnl # + dnl # v6.4-rc7-2018-g09593216bff1 + dnl # drm: execution context for GEM buffers v7 + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_exec.h]) + + dnl # + dnl # v6.6-rc2-771-g8eb80946ab0c + dnl # drm/edid: split out drm_eld.h from drm_edid.h + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_eld.h]) + + dnl # + dnl # v6.9-rc6-1436-gaae4682e5d66 + dnl # drm/fbdev-generic: Convert to fbdev-ttm + dnl # + AC_KERNEL_CHECK_HEADERS([drm/drm_fbdev_ttm.h]) + +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-kms-helper-hotplug-event.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-kms-helper-hotplug-event.m4 new file mode 100644 index 0000000000000..2bb2ec7fade6e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-kms-helper-hotplug-event.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.15-rc2-1274-g710074bb8ab0 +dnl # drm/probe-helper: add drm_kms_helper_connector_hotplug_event +dnl # +AC_DEFUN([AC_AMDGPU_DRM_KMS_HELPER_CONNECTOR_HOTPLUG_EVENT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_kms_helper_connector_hotplug_event(NULL); + ], [ + AC_DEFINE(HAVE_DRM_KMS_HELPER_CONNECTOR_HOTPLUG_EVENT, 1, + [drm_kms_helper_connector_hotplug_event() function is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-memcpy-from-wc.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-memcpy-from-wc.m4 new file mode 100644 index 0000000000000..8ab4aaf2521dc --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-memcpy-from-wc.m4 @@ -0,0 +1,27 @@ +dnl # +dnl # commit b7e32bef4ae5f9149276203564b7911fac466588 +dnl # drm: Add a prefetching memcpy_from_wc +dnl # +AC_DEFUN([AC_AMDGPU_DRM_MEMCPY_FROM_WC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct iosys_map *dst = NULL, *src = NULL; + drm_memcpy_from_wc(dst, src, 0); + ], [ + AC_DEFINE(HAVE_DRM_MEMCPY_FROM_WC_IOSYS_MAP_ARG, 1, + [drm_memcpy_from_wc() is availablea and has struct iosys_map* arg]) + ], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_memcpy_from_wc(NULL, NULL, 0); + ], [ + AC_DEFINE(HAVE_DRM_MEMCPY_FROM_WC, 1, + [drm_memcpy_from_wc() is availablea]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-mode-init.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-mode-init.m4 new file mode 100644 index 0000000000000..9a220e320f721 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-mode-init.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.17-rc2-403-g2d3eec897033 +dnl # drm: Add drm_mode_init() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_MODE_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_mode_init(NULL, NULL); + ], [drm_mode_init], [drivers/gpu/drm/drm_modes.c], [ + AC_DEFINE(HAVE_DRM_MODE_INIT, 1, + [drm_mode_init() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-plane-helper-funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-plane-helper-funcs.m4 new file mode 100644 index 0000000000000..e4a2a7d627810 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-plane-helper-funcs.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # commit: v6.1-rc1-27-30c637151cfa +dnl # drm/plane-helper: Export individual helpers +dnl # + +AC_DEFUN([AC_AMDGPU_DRM_PLANE_HELPER_DESTROY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_plane_helper_destroy(NULL); + ], [ + AC_DEFINE(HAVE_DRM_PLANE_HELPER_DESTROY, 1, + [drm_plane_helper_destroy() is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_PLANE_HELPER_FUNCS], [ + AC_AMDGPU_DRM_PLANE_HELPER_DESTROY +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-prime-pages-to-sg.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-prime-pages-to-sg.m4 new file mode 100644 index 0000000000000..cae3e54d7c7f7 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-prime-pages-to-sg.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit 707d561f77b5e2a6f90c9786bee44ee7a8dedc7e +dnl # drm: allow limiting the scatter list size. +dnl # +AC_DEFUN([AC_AMDGPU_DRM_PRIME_PAGES_TO_SG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_prime_pages_to_sg(NULL, NULL, 0); + ], [drm_prime_pages_to_sg], [drivers/gpu/drm/drm_prime.c], [ + AC_DEFINE(HAVE_DRM_PRIME_PAGES_TO_SG_3ARGS, 1, + [drm_prime_pages_to_sg() wants 3 arguments]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-show-fdinfo.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-show-fdinfo.m4 new file mode 100644 index 0000000000000..a144bfa3967d4 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-show-fdinfo.m4 @@ -0,0 +1,37 @@ +dnl # +dnl # v6.4-rc1-190-g3f09a0cd4ea3:drm: Add common fdinfo helper +dnl # +AC_DEFUN([AC_AMDGPU_DRM_FILE_DRM_SHOW_FDINFO], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_show_fdinfo(NULL, NULL); + ],[drm_show_fdinfo], [drivers/gpu/drm/drm_file.c], [ + AC_DEFINE(HAVE_DRM_SHOW_FDINFO, 1, [drm_show_fdinfo() is available]) + ]) + ]) +]) + +dnl # +dnl # v6.4-rc1-190-g3f09a0cd4ea3:drm: Add common fdinfo helper +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DRIVER_SHOW_FDINFO], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_driver *drm_driver = NULL; + + drm_driver->show_fdinfo(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DRIVER_SHOW_FDINFO, 1, + [drm_driver->show_fdinfo() is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_SHOW_FDINFO], [ + AC_AMDGPU_DRM_FILE_DRM_SHOW_FDINFO + AC_AMDGPU_DRM_DRIVER_SHOW_FDINFO +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-up-update-payload-part1-start-slot-arg.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-up-update-payload-part1-start-slot-arg.m4 new file mode 100644 index 0000000000000..1b341003bb985 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-up-update-payload-part1-start-slot-arg.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.13-2803-gcf95d5c0c941 +dnl # drm: Update MST First Link Slot Information Based on Encoding Format +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_UPDATE_PAYLOAD_PART1_START_SLOT_ARG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dp_update_payload_part1(NULL, 0); + ], [ + AC_DEFINE(HAVE_DRM_DP_UPDATE_PAYLOAD_PART1_START_SLOT_ARG, 1, + [drm_dp_update_payload_part1() function has start_slot argument]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-vblank.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-vblank.m4 new file mode 100644 index 0000000000000..d2f9038b46063 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-vblank.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v6.9-rc2-247-gd12e36494dc2 +dnl # drm/vblank: Introduce drm_crtc_vblank_crtc() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CRTC_VBLANK_CRTC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_crtc_vblank_crtc(NULL); + ],[ + AC_DEFINE(HAVE_CRTC_DRM_VBLANK_CRTC, 1, + [drm_edid_raw() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-vma-offset-node-readonly-field.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-vma-offset-node-readonly-field.m4 new file mode 100644 index 0000000000000..06ca73cc0fe91 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-vma-offset-node-readonly-field.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.13-rc3-1382-gf425821b9468 +dnl # drm/vma: Add a driver_private member to vma_node. +dnl # +AC_DEFUN([AC_AMDGPU_DRM_VMA_OFFSET_NODE_READONLY_FIELD], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_vma_offset_node *node = NULL; + node->readonly = false; + ], [ + AC_DEFINE(HAVE_DRM_VMA_OFFSET_NODE_READONLY_FIELD, 1, [struct drm_vma_offset_node has readonly field]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_aperture_remove_conflicting_pci_framebuffers.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_aperture_remove_conflicting_pci_framebuffers.m4 new file mode 100644 index 0000000000000..f4c7be22ebded --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_aperture_remove_conflicting_pci_framebuffers.m4 @@ -0,0 +1,22 @@ +dnl # +dnl # v5.12-rc3-332-g603dc7ed917f drm/aperture: Inline fbdev conflict helpers into aperture helpers +dnl # v5.12-rc3-330-g2916059147ea drm/aperture: Add infrastructure for aperture ownership +dnl # +AC_DEFUN([AC_AMDGPU_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS], [ + AC_KERNEL_DO_BACKGROUND([ + AS_IF([test x$HAVE_DRM_DRM_APERTURE_H = x ], [ + dnl # + dnl # v5.3-rc1-540-g0a8459693238 fbdev: drop res_id parameter from remove_conflicting_pci_framebuffers + dnl # + AC_KERNEL_TRY_COMPILE([ + struct task_struct; + #include + ], [ + remove_conflicting_pci_framebuffers(NULL, NULL); + ], [ + AC_DEFINE(HAVE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_PP, 1, + [remove_conflicting_pci_framebuffers() wants p,p args]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_atomic_helper_calc_timestamping_constants.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_atomic_helper_calc_timestamping_constants.m4 new file mode 100644 index 0000000000000..7b6d31518fe3c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_atomic_helper_calc_timestamping_constants.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.9-rc5-1595-ge1ad957d45f7 +dnl # Extract drm_atomic_helper_calc_timestamping_constants() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_ATOMIC_HELPER_CALC_TIMESTAMPING_CONSTANTS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_atomic_helper_calc_timestamping_constants(NULL); + ],[drm_atomic_helper_calc_timestamping_constants], [drivers/gpu/drm/drm_atomic_helper.c], [ + AC_DEFINE(HAVE_DRM_ATOMIC_HELPER_CALC_TIMESTAMPING_CONSTANTS, 1, + [drm_atomic_helper_calc_timestamping_constants() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_atomic_plane_enabling.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_atomic_plane_enabling.m4 new file mode 100644 index 0000000000000..5fe466630d8dd --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_atomic_plane_enabling.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v6.2-rc6-1230-g169b9182f192 +dnl # drm/atomic-helper: Add atomic_enable plane-helper callback +dnl # +AC_DEFUN([AC_AMDGPU_DRM_ATOMIC_PLANE_ENABLING], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_atomic_plane_enabling(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_ATOMIC_PLANE_ENABLING, 1, + [drm_atomic_plane_enabling() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_atomic_private_obj_init.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_atomic_private_obj_init.m4 new file mode 100644 index 0000000000000..12b80d67386a9 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_atomic_private_obj_init.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v4.20-rc4-945-gb962a12050a3 +dnl # drm/atomic: integrate modeset lock with private objects +dnl # +AC_DEFUN([AC_AMDGPU_DRM_ATOMIC_PRIVATE_OBJ_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_atomic_private_obj_init(NULL, NULL, NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_ATOMIC_PRIVATE_OBJ_INIT_4ARGS, 1, + [drm_atomic_private_obj_init() wants 4 args]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_client_register.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_client_register.m4 new file mode 100644 index 0000000000000..ef060260bee20 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_client_register.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.1-rc2-1103-ge33898a20744 +dnl # drm/client: Rename drm_client_add() to drm_client_register() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CLIENT_REGISTER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_client_register(NULL); + ],[ + AC_DEFINE(HAVE_DRM_CLIENT_REGISTER, 1, + [drm_client_register() is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_connector_helper_funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_connector_helper_funcs.m4 new file mode 100644 index 0000000000000..11f356754b2c5 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_connector_helper_funcs.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v5.1-rc1-14-g9d2230dc1351 +dnl # drm: writeback: Add job prepare and cleanup operations +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_HELPER_FUNCS_PREPARE_WRITEBACK_JOB], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector_helper_funcs *funcs = NULL; + funcs->prepare_writeback_job((struct drm_writeback_connector *)NULL, (struct drm_writeback_job *)NULL); + ],[ + AC_DEFINE(HAVE_DRM_CONNECTOR_HELPER_FUNCS_PREPARE_WRITEBACK_JOB, 1, + [drm_connector_helper_funcs->prepare_writeback_job is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_debug_enabled.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_debug_enabled.m4 new file mode 100644 index 0000000000000..88be95171ea7b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_debug_enabled.m4 @@ -0,0 +1,54 @@ +dnl # +dnl # commit v5.3-rc1-708-gf0a8f533adc2 +dnl # drm/print: add drm_debug_enabled() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DEBUG_ENABLED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_debug_enabled(0); + ],[ + AC_DEFINE(HAVE_DRM_DEBUG_ENABLED, + 1, + [drm_debug_enabled() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v6.8-rc3-242-g9fd6f61a297e +dnl # drm/print: add drm_dbg_printer() for drm device specific printer +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DBG_PRINTER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_dbg_printer(NULL, 0, NULL); + ],[ + AC_DEFINE(HAVE_DRM_DBG_PRINTER, + 1, + [drm_dbg_printer() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v5.4-rc4-974-g876905b8fe59 +dnl # drm/print: convert debug category macros into an enum +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DEBUG_CATEGORY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + enum drm_debug_category category; + category = DRM_UT_CORE; + ],[ + AC_DEFINE(HAVE_DRM_DEBUG_CATEGORY, + 1, + [enum drm_debug_category is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_hpd_irq_handle_event.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_hpd_irq_handle_event.m4 new file mode 100644 index 0000000000000..a70fd97681104 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_hpd_irq_handle_event.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v6.2-4472-g55970ce50152 +dnl # drm/dp_mst: Clear MSG_RDY flag before sending new message +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_HPD_IRQ_HANDLE_EVENT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dp_mst_hpd_irq_handle_event(NULL, NULL, NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_HPD_IRQ_HANDLE_EVENT, 1, + [drm_dp_mst_hpd_irq_handle_event() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_topology_mgr.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_topology_mgr.m4 new file mode 100644 index 0000000000000..c674432e635f2 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_topology_mgr.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # commit v4.14-rc1-a4370c7774 +dnl # drm/atomic: Make private objs proper objects +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_BASE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_dp_mst_topology_mgr *mst_mgr = 0; + int i = 0; + if ((&mst_mgr->base) && (&mst_mgr->base.lock)) + i++; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_MGR_BASE, 1, + [struct drm_dp_mst_topology_mgr.base is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_topology_mgr_init.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_topology_mgr_init.m4 new file mode 100644 index 0000000000000..98d2982594b7c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dp_mst_topology_mgr_init.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v4.10-rc3-517-g7b0a89a6db9a +dnl # drm/dp: Store drm_device in MST topology manager +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dp_mst_topology_mgr_init(NULL, (struct drm_device *)NULL, NULL, 0, 0, 0, 0, 0); + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_TOPOLOGY_MGR_INIT_MAX_LANE_COUNT, 1, + [drm_dp_mst_topology_mgr_init() has max_lane_count and max_link_rate]) + + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dp_read_dpcd_caps.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dp_read_dpcd_caps.m4 new file mode 100644 index 0000000000000..8306568c2e0ca --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dp_read_dpcd_caps.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.9-rc1-294-gb9936121d95b +dnl # drm/i915/dp: Extract drm_dp_read_dpcd_caps() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_READ_DPCD_CAPS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_dp_read_dpcd_caps(NULL, NULL); + ], [drm_dp_read_dpcd_caps], [drivers/gpu/drm/display/drm_dp_helper.c drivers/gpu/drm/drm_dp_helper.c], [ + AC_DEFINE(HAVE_DRM_DP_READ_DPCD_CAPS, 1, + [drm_dp_read_dpcd_caps() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dp_subconnector.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dp_subconnector.m4 new file mode 100644 index 0000000000000..527068f2403f8 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dp_subconnector.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v5.8-rc2-671-ge5b92773287c drm: report dp downstream port type as a subconnector property +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_SUBCONNECTOR], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + struct drm_mode_config *mode_config = NULL; + mode_config->dp_subconnector_property = NULL; + ], [ + AC_DEFINE(HAVE_DRM_MODE_CONFIG_DP_SUBCONNECTOR_PROPERTY, 1, + [drm_mode_config->dp_subconnector_property is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dsc_compute_rc_parameters.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dsc_compute_rc_parameters.m4 new file mode 100644 index 0000000000000..a6f72c8fe6ba9 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dsc_compute_rc_parameters.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.18-rc2-597-g2a64b147350f +dnl # drm/display: Move DSC header and helpers into display-helper module +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DSC_COMPUTE_RC_PARAMETERS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dsc_compute_rc_parameters(NULL); + ], [ + AC_DEFINE(HAVE_DRM_DSC_COMPUTE_RC_PARAMETERS, 1, + [drm_dsc_compute_rc_parameters() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dsc_config.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dsc_config.m4 new file mode 100644 index 0000000000000..e10e43c6dfe6c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dsc_config.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v4.20-rc3-804-g19fd5adbb595 +dnl # drm/dsc: Define VESA Display Stream Compression Capabilities +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DSC_CONFIG_SIMPLE_422], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_dsc_config *conf = NULL; + conf->simple_422 = true; + ], [ + AC_DEFINE(HAVE_DRM_DSC_CONFIG_SIMPLE_422, 1, + [struct drm_dsc_config has member simple_422]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_dsc_pps_payload_pack.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_dsc_pps_payload_pack.m4 new file mode 100644 index 0000000000000..b85668160976c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_dsc_pps_payload_pack.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.18-rc2-597-g2a64b147350f +dnl # drm/display: Move DSC header and helpers into display-helper module +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DSC_PPS_PAYLOAD_PACK], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_dsc_pps_payload_pack(NULL, NULL); + ], [ + AC_DEFINE(HAVE_DRM_DSC_PPS_PAYLOAD_PACK, 1, + [drm_dsc_pps_payload_pack() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_edid_alloc.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_edid_alloc.m4 new file mode 100644 index 0000000000000..86301a9a861f4 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_edid_alloc.m4 @@ -0,0 +1,57 @@ +dnl # +dnl # commit v5.18-rc5-1218-g6537f79a2aae +dnl # drm/edid: add new interfaces around struct drm_edid +dnl # +AC_DEFUN([AC_AMDGPU_DRM_EDID_MALLOC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_edid_alloc(NULL, 0); + ],[ + AC_DEFINE(HAVE_DRM_EDID_MALLOC, 1, + [drm_edid_alloc() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v5.19-rc2-380-g3d1ab66e043f +dnl # drm/edid: add drm_edid_raw() to access the raw EDID data +dnl # +AC_DEFUN([AC_AMDGPU_DRM_EDID_RAW], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_edid_raw(NULL); + ],[ + AC_DEFINE(HAVE_DRM_EDID_RAW, 1, + [drm_edid_raw() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v6.1-rc1-145-g6c9b3db70aad +dnl # drm/edid: add function for checking drm_edid validity +dnl # +AC_DEFUN([AC_AMDGPU_DRM_EDID_VALID], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_edid_valid(NULL); + ],[ + AC_DEFINE(HAVE_DRM_EDID_VALID, 1, + [drm_edid_valid() is available]) + ]) + ]) +]) + + +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_EDID], [ + AC_AMDGPU_DRM_EDID_MALLOC + AC_AMDGPU_DRM_EDID_RAW + AC_AMDGPU_DRM_EDID_VALID +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_fb_helper_init.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_fb_helper_init.m4 new file mode 100644 index 0000000000000..5bbcaa354dfde --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_fb_helper_init.m4 @@ -0,0 +1,31 @@ +dnl # +dnl # commit v5.6-rc2-1021-g2dea2d118217 +dnl # drm: Remove unused arg from drm_fb_helper_init +dnl # +AC_DEFUN([AC_AMDGPU_DRM_FB_HELPER_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + #include + ], [ + drm_fb_helper_init(NULL, NULL); + ], [drm_fb_helper_init], [drivers/gpu/drm/drm_fb_helper.c], [ + AC_DEFINE(HAVE_DRM_FB_HELPER_INIT_2ARGS, 1, + [drm_fb_helper_init() has 2 args]) + ], [ + dnl # + dnl # commit v4.10-rc5-1046-ge4563f6ba717 + dnl # drm: Rely on mode_config data for fb_helper initialization + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + #include + ], [ + drm_fb_helper_init(NULL, NULL, 0); + ], [drm_fb_helper_init], [drivers/gpu/drm/drm_fb_helper.c], [ + AC_DEFINE(HAVE_DRM_FB_HELPER_INIT_3ARGS, 1, + [drm_fb_helper_init() has 3 args]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_firmware_drivers_only.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_firmware_drivers_only.m4 new file mode 100644 index 0000000000000..3d95cd4406fe2 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_firmware_drivers_only.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.16-rc1-268-g6a2d2ddf2c34 +dnl # drm: Move nomodeset kernel parameter to the DRM subsystem +dnl # +AC_DEFUN([AC_AMDGPU_DRM_FIRMWARE_DRIVERS_ONLY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_firmware_drivers_only(); + ], [ + AC_DEFINE(HAVE_DRM_FIRMWARE_DRIVERS_ONLY, 1, + [drm_firmware_drivers_only() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_format_info.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_format_info.m4 new file mode 100644 index 0000000000000..54d06ba68400f --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_format_info.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # v5.9-rc5-1367-g564b9f4c7cf9 +dnl # drm/amd/display: Add formats for DCC with 2/3 planes +dnl # +AC_DEFUN([AC_AMDGPU_DRM_FORMAT_INFO], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_format_info format; + + format.format = DRM_FORMAT_XRGB16161616F; + format.block_w[0] = 0; + format.block_h[0] = 0; + ], [ + AC_DEFINE(HAVE_DRM_FORMAT_INFO_MODIFIER_SUPPORTED, 1, + [drm_format_info.block_w and rm_format_info.block_h is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_gem_plane_helper_prepare_fb.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_gem_plane_helper_prepare_fb.m4 new file mode 100644 index 0000000000000..e0311f6ebd145 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_gem_plane_helper_prepare_fb.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit 96d4f267e40f9509e8a66e2b39e8b95655617693 +dnl # Author: Linus Torvalds +dnl # Date: Thu Jan 3 18:57:57 2019 -0800 +dnl # Remove 'type' argument from access_ok() function +dnl # +AC_DEFUN([AC_AMDGPU_DRM_GEM_PLANE_HELPER_PREPARE_FB], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + drm_gem_plane_helper_prepare_fb(NULL, NULL); + ],[ + AC_DEFINE(HAVE_DRM_GEM_PLANE_HELPER_PREPARE_FB, 1, + [drm_gem_plane_helper_prepare_fb() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_gem_prime_handle_to_dmabuf.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_gem_prime_handle_to_dmabuf.m4 new file mode 100644 index 0000000000000..cfdf6da657222 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_gem_prime_handle_to_dmabuf.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v6.10-3140-ge9b641807e5e +dnl # drm: new helper: drm_gem_prime_handle_to_dmabuf() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_GEM_PRIME_HANDLE_TO_DMABUF], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_gem_prime_handle_to_dmabuf(NULL, NULL, 0, 0); + ],[drm_gem_prime_handle_to_dmabuf],[drivers/gpu/drm/drm_prime.c],[ + AC_DEFINE(HAVE_DRM_GEM_PRIME_HANDLE_TO_DMABUF, 1, + [drm_gem_prime_handle_to_dmabuf() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_helper_force_disable_all.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_helper_force_disable_all.m4 new file mode 100644 index 0000000000000..69513e0cacc39 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_helper_force_disable_all.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # commit f453ba0460742ad027ae0c4c7d61e62817b3e7ef +dnl # DRM: add mode setting support +dnl # +dnl # commit v5.0-rc1-118-gc2d88e06bcb9 +dnl # drm: Move the legacy kms disable_all helper to crtc helpers +dnl # +AC_DEFUN([AC_AMDGPU_DRM_HELPER_FORCE_DISABLE_ALL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_helper_force_disable_all(NULL); + ],[drm_helper_force_disable_all], [drivers/gpu/drm/drm_crtc_helper.c],[ + AC_DEFINE(HAVE_DRM_HELPER_FORCE_DISABLE_ALL, 1, + [drm_helper_force_disable_all() is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_mode_config.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_mode_config.m4 new file mode 100644 index 0000000000000..4b809aec8cd50 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_mode_config.m4 @@ -0,0 +1,36 @@ +AC_DEFUN([AC_AMDGPU_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_mode_config *mode_config = NULL; + mode_config->fb_modifiers_not_supported = true; + ], [ + AC_DEFINE(HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED, 1, + [drm_mode_config->fb_modifiers_not_supported is available]) + ]) + ]) +]) + +dnl # +dnl # v6.1-rc1-103-g7c99616e3fe7 drm: Remove drm_mode_config::fb_base +dnl # +AC_DEFUN([AC_AMDGPU_DRM_MODE_CONFIG_FB_BASE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_mode_config *mode_config = NULL; + mode_config->fb_base = 0; + ], [ + AC_DEFINE(HAVE_DRM_MODE_CONFIG_FB_BASE, 1, + [drm_mode_config->fb_base is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_DRM_MODE_CONFIG], [ + AC_AMDGPU_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED + AC_AMDGPU_DRM_MODE_CONFIG_FB_BASE +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_mode_create_hdmi_colorspace_property.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_mode_create_hdmi_colorspace_property.m4 new file mode 100644 index 0000000000000..ecc33db72dcbd --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_mode_create_hdmi_colorspace_property.m4 @@ -0,0 +1,53 @@ +dnl # +dnl # commit v5.3-rc1-675-g8806cd3aa025 +dnl # drm: Rename HDMI colorspace property creation function +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_connector_attach_colorspace_property(NULL); + ],[drm_connector_attach_colorspace_property], [drivers/gpu/drm/drm_connector.c], [ + AC_DEFINE(HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY, 1, + [drm_connector_attach_colorspace_property() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v6.1-5783-g08383039cd19 +dnl # drm/connector: Allow drivers to pass list of supported colorspaces +dnl # +AC_DEFUN([AC_AMDGPU_DRM_MODE_CREATE_HDMI_COLORSPACE_PROPERTY], [ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_mode_create_hdmi_colorspace_property(NULL, 0); + ], [drm_mode_create_hdmi_colorspace_property], [drivers/gpu/drm/drm_connector.c], [ + AC_DEFINE(HAVE_DRM_MODE_CREATE_HDMI_COLORSPACE_PROPERTY_2ARGS, 1, + [drm_mode_create_hdmi_colorspace_property() has 2 args]) + ]) +]) + +dnl # +dnl # commit v6.1-5783-g08383039cd19 +dnl # drm/connector: Allow drivers to pass list of supported colorspaces +dnl # +AC_DEFUN([AC_AMDGPU_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY], [ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_mode_create_dp_colorspace_property(NULL, 0); + ], [drm_mode_create_dp_colorspace_property], [drivers/gpu/drm/drm_connector.c], [ + AC_DEFINE(HAVE_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_2ARGS, 1, + [drm_mode_create_dp_colorspace_property() has 2 args]) + ]) +]) + + +AC_DEFUN([AC_AMDGPU_DRM_MODE_CREATE_COLORSPACE_PROPERTY_FUNCS], [ + AC_AMDGPU_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY + AC_AMDGPU_DRM_MODE_CREATE_HDMI_COLORSPACE_PROPERTY + AC_AMDGPU_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_prime.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_prime.m4 new file mode 100644 index 0000000000000..5d14a0d6b877a --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_prime.m4 @@ -0,0 +1,36 @@ +dnl # +dnl # commit v5.10-rc3-1140-gc67e62790f5c +dnl # drm/prime: split array import functions v4 +dnl # +AC_DEFUN([AC_AMDGPU_DRM_PRIME_SG_TO_DMA_ADDR_ARRAY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_prime_sg_to_dma_addr_array(NULL, NULL, 0); + ],[drm_prime_sg_to_dma_addr_array], [drivers/gpu/drm/drm_prime.c], [ + AC_DEFINE(HAVE_DRM_PRIME_SG_TO_DMA_ADDR_ARRAY, 1, + [drm_prime_sg_to_dma_addr_array() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v3.3-9296-g3248877ea179 +dnl # drm: base prime/dma-buf support (v5) +dnl # +dnl # commit v6.4-rc7-1904-g71a7974ac701 +dnl # drm/prime: Unexport helpers for fd/handle conversion +dnl # +AC_DEFUN([AC_AMDGPU_DRM_GEM_PRIME_HANDLE_TO_FD], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_gem_prime_handle_to_fd(NULL, NULL, 0, 0, NULL); + ],[drm_gem_prime_handle_to_fd], [drivers/gpu/drm/drm_prime.c], [ + AC_DEFINE(HAVE_DRM_GEM_PRIME_HANDLE_TO_FD, 1, + [drm_gem_prime_handle_to_fd() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_print_bits.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_print_bits.m4 new file mode 100644 index 0000000000000..fb7266321075c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_print_bits.m4 @@ -0,0 +1,29 @@ +dnl # +dnl # v5.3-rc1-684-g141f6357f45c +dnl # drm: tweak drm_print_bits() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_PRINT_BITS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_print_bits(NULL, 0, NULL, 0); + ], [ + AC_DEFINE(HAVE_DRM_PRINT_BITS_4ARGS, 1, + [drm_print_bits() has 4 args]) + AC_DEFINE(HAVE_DRM_PRINT_BITS, 1, + [drm_print_bits() is available]) + ], [ + dnl # v5.3-rc1-622-g2dc5d44ccc5e + dnl # drm: add drm_print_bits + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_print_bits(NULL, 0, NULL, 0, 0); + ],[drm_print_bits], [drivers/gpu/drm/drm_print.c], [ + AC_DEFINE(HAVE_DRM_PRINT_BITS, 1, + [drm_print_bits() is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_simple_kms_helper.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_simple_kms_helper.m4 new file mode 100644 index 0000000000000..837e690cc9c32 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_simple_kms_helper.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.6-rc2-359-g63170ac6f2e8 +dnl # drm/simple-kms: Add drm_simple_encoder_{init,create}() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_SIMPLE_ENCODER_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_simple_encoder_init(NULL, NULL, 0); + ],[drm_simple_encoder_init], [drivers/gpu/drm/drm_simple_kms_helper.c],[ + AC_DEFINE(HAVE_DRM_SIMPLE_ENCODER_INIT, 1, + [drm_simple_encoder is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_suballoc.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_suballoc.m4 new file mode 100644 index 0000000000000..bcb026ad2c36d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_suballoc.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v6.2-rc6-1265-g849ee8a2f0df +dnl # drm/suballoc: Extract amdgpu_sa.c as generic suballocation helper +dnl # +AC_DEFUN([AC_AMDGPU_DRM_SUBALLOC_MANAGER_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + drm_suballoc_manager_init(NULL, 0, 0); + ], [ + AC_DEFINE(HAVE_DRM_SUBALLOC_MANAGER_INIT, 1, + [Has function drm_suballoc_manager_init()]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_vblank_crtc_config .m4 b/drivers/gpu/drm/amd/dkms/m4/drm_vblank_crtc_config .m4 new file mode 100644 index 0000000000000..90ef0ba2c3cc5 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_vblank_crtc_config .m4 @@ -0,0 +1,19 @@ +dnl # +dnl # v5.11-20-g2d24dd5798d0 +dnl # rbtree: Add generic add and find helpers +dnl # +AC_DEFUN([AC_AMDGPU_DRM_VBLANK_CRTC_CONFIG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_vblank_crtc_config config; + ],[ + AC_DEFINE(HAVE_DRM_VBLANK_CRTC_CONFIG, 1, + [drm_vblank_crtc_config is available]) + ]) + ]) +]) + + + diff --git a/drivers/gpu/drm/amd/dkms/m4/drm_writeback_connector_init.m4 b/drivers/gpu/drm/amd/dkms/m4/drm_writeback_connector_init.m4 new file mode 100644 index 0000000000000..7f2c208c36f96 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm_writeback_connector_init.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.18-rc2-67-g57b8280a0a41 +dnl # drm: allow passing possible_crtcs to drm_writeback_connector_init() +dnl # +AC_DEFUN([AC_AMDGPU_DRM_WRITEBACK_CONNECTOR_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + drm_writeback_connector_init(NULL, NULL, NULL, NULL, NULL, 0, 0); + ],[drm_writeback_connector_init], [drivers/gpu/drm/drm_writeback.c],[ + AC_DEFINE(HAVE_DRM_WRITEBACK_CONNECTOR_INIT_7_ARGS, 1, + [drm_writeback_connector_init() has 7 args]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/fault_flag_allow_retry_first.m4 b/drivers/gpu/drm/amd/dkms/m4/fault_flag_allow_retry_first.m4 new file mode 100644 index 0000000000000..ce4b655254f91 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/fault_flag_allow_retry_first.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v5.6-5709-g4064b9827063 +dnl # mm: allow VM_FAULT_RETRY for multiple times +dnl # +AC_DEFUN([AC_AMDGPU_FAULT_FLAG_ALLOW_RETRY_FIRST], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + fault_flag_allow_retry_first(0); + ], [ + AC_DEFINE(HAVE_FAULT_FLAG_ALLOW_RETRY_FIRST, 1, + [fault_flag_allow_retry_first() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/follow_pfn.m4 b/drivers/gpu/drm/amd/dkms/m4/follow_pfn.m4 new file mode 100644 index 0000000000000..ea2c47c00ab90 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/follow_pfn.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v6.9-rc4-152-gcb10c28ac82c +dnl # mm: remove follow_pfn +dnl # +AC_DEFUN([AC_AMDGPU_FOLLOW_PFN], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + follow_pfn(NULL, 0, NULL); + ],[follow_pfn], [mm/memory.c],[ + AC_DEFINE(HAVE_FOLLOW_PFN, 1, + [follow_pfn() is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/fs_reclaim_acquire.m4 b/drivers/gpu/drm/amd/dkms/m4/fs_reclaim_acquire.m4 new file mode 100644 index 0000000000000..be2f5c9928ae2 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/fs_reclaim_acquire.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # v4.13-rc4-164-gd92a8cfcb37e +dnl # locking/lockdep: Rework FS_RECLAIM annotation +dnl # +AC_DEFUN([AC_AMDGPU_FS_RECLAIM_ACQUIRE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + fs_reclaim_acquire(0); + ],[ + AC_DEFINE(HAVE_FS_RECLAIM_ACQUIRE, 1, [fs_reclaim_acquire() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/fsleep.m4 b/drivers/gpu/drm/amd/dkms/m4/fsleep.m4 new file mode 100644 index 0000000000000..782402a16e6a4 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/fsleep.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v5.7-rc2-1263-gc6af13d33475 +dnl # timer: add fsleep for flexible sleeping +dnl # +AC_DEFUN([AC_AMDGPU_FSLEEP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + unsigned long usecs = 0; + fsleep(usecs); + ], [ + AC_DEFINE(HAVE_FSLEEP, 1, + [fsleep() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/generic_handle_domain_irq.m4 b/drivers/gpu/drm/amd/dkms/m4/generic_handle_domain_irq.m4 new file mode 100644 index 0000000000000..01d095dd14cb8 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/generic_handle_domain_irq.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.13-rc4-24-g8240ef50d486 +dnl # genirq: Add generic_handle_domain_irq() helper +dnl # +AC_DEFUN([AC_AMDGPU_GENERIC_HANDLE_DOMAIN_IRQ], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + generic_handle_domain_irq(NULL, 0); + ], [ + AC_DEFINE(HAVE_GENERIC_HANDLE_DOMAIN_IRQ, 1, + [generic_handle_domain_irq() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/get-user-pages-remote.m4 b/drivers/gpu/drm/amd/dkms/m4/get-user-pages-remote.m4 new file mode 100644 index 0000000000000..d538ceb02d6b2 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/get-user-pages-remote.m4 @@ -0,0 +1,60 @@ +AC_DEFUN([AC_AMDGPU_GET_USER_PAGES_REMOTE], [ + AC_KERNEL_DO_BACKGROUND([ + dnl # + dnl # v5.8-12463-g64019a2e467a + dnl # mm/gup: remove task_struct pointer for all gup code + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + get_user_pages_remote(NULL, 0, 0, 0, NULL, NULL, NULL); + ], [get_user_pages_remote],[mm/gup.c],[ + AC_DEFINE(HAVE_GET_USER_PAGES_REMOTE_REMOVE_TASK_STRUCT, 1, + [get_user_pages_remote() remove task_struct pointer]) + ], [ + dnl # + dnl # commit v4.9-7744-g5b56d49fc31d + dnl # mm: add locked parameter to get_user_pages_remote() + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + get_user_pages_remote(NULL, NULL, 0, 0, 0, NULL, NULL, NULL); + ], [get_user_pages_remote],[mm/gup.c],[ + AC_DEFINE(HAVE_GET_USER_PAGES_REMOTE_LOCKED, 1, + [get_user_pages_remote() wants locked parameter]) + ],[ + dnl # + dnl # commit v4.8-14096-g9beae1ea8930 + dnl # mm: replace get_user_pages_remote() write/force parameters + dnl # with gup_flags + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + get_user_pages_remote(NULL, NULL, 0, 0, 0, NULL, NULL); + ], [get_user_pages_remote],[mm/gup.c],[ + AC_DEFINE(HAVE_GET_USER_PAGES_REMOTE_GUP_FLAGS, 1, + [get_user_pages_remote() wants gup_flags parameter]) + ],[ + dnl # + dnl # commit v6.4-rc4-55-gca5e863233e8 + dnl # mm/gup: remove vmas parameter from get_user_pages_remote() + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + get_user_pages_remote(NULL, 0, 0, 0, NULL, NULL); + ], [get_user_pages_remote],[mm/gup.c],[ + AC_DEFINE(HAVE_GET_USER_PAGES_REMOTE_REMOVE_VMAS, 1, + [get_user_pages_remote() remove argument vmas]) + ],[ + AC_DEFINE(HAVE_GET_USER_PAGES_REMOTE_INTRODUCED, 1, + [get_user_pages_remote() is introduced with initial prototype]) + ]) + ]) + ]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/get-user-pages.m4 b/drivers/gpu/drm/amd/dkms/m4/get-user-pages.m4 new file mode 100644 index 0000000000000..8042f69a0228e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/get-user-pages.m4 @@ -0,0 +1,43 @@ +dnl # +dnl # commit v4.8-14095-g768ae309a961 +dnl # mm: replace get_user_pages() write/force parameters with gup_flags +dnl # +AC_DEFUN([AC_AMDGPU_GET_USER_PAGES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + get_user_pages(0, 0, 0, NULL, NULL); + ], [get_user_pages], [mm/gup.c], [ + AC_DEFINE(HAVE_GET_USER_PAGES_GUP_FLAGS, 1, + [get_user_pages() wants gup_flags parameter]) + ], [ + dnl # + dnl # commit v4.6-rc2-1-gc12d2da56d0e + dnl # mm/gup: Remove the macro overload API migration helpers + dnl # from the get_user*() APIs + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + get_user_pages(0, 0, 0, 0, NULL, NULL); + ], [get_user_pages], [mm/gup.c], [ + AC_DEFINE(HAVE_GET_USER_PAGES_6ARGS, 1, + [get_user_pages() wants 6 args]) + ],[ + dnl # + dnl # commit v6.4-rc4-53-g54d020692b34 + dnl # mm/gup: remove unused vmas parameter from get_user_pages() + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + get_user_pages(0, 0, 0, NULL); + ], [get_user_pages], [mm/gup.c], [ + AC_DEFINE(HAVE_GET_USER_PAGES_REMOVE_VMAS, 1, + [get_user_pages() remove vmas argument]) + ]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/highmem-internal.m4 b/drivers/gpu/drm/amd/dkms/m4/highmem-internal.m4 new file mode 100644 index 0000000000000..4527ccd0a9659 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/highmem-internal.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit f3ba3c710ac5a30cd058615a9eb62d2ad95bb782 +dnl # mm/highmem: Provide kmap_local* +dnl # +AC_DEFUN([AC_AMDGPU_KMAP_LOCAL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + pgprot_t prot = {0}; + kmap_local_page_prot(NULL, prot); + ], [ + AC_DEFINE(HAVE_KMAP_LOCAL, 1, [kmap_local_* is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/hmm.m4 b/drivers/gpu/drm/amd/dkms/m4/hmm.m4 new file mode 100644 index 0000000000000..72a1a8260873a --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/hmm.m4 @@ -0,0 +1,76 @@ +dnl # +dnl # v5.7-rc4-4-g2733ea144dcc mm/hmm: remove the customizable pfn format from hmm_range_fault +dnl # v5.7-rc4-3-g5c8f3c4cf18a mm/hmm: remove HMM_PFN_SPECIAL +dnl # v5.7-rc4-2-g4e2490843d55 drm/amdgpu: remove dead code after hmm_range_fault() +dnl # v5.7-rc4-1-gbe957c886d92 mm/hmm: make hmm_range_fault return 0 or -1 +dnl # +AC_DEFUN([AC_AMDGPU_HMM_RANGE_FAULT], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + enum hmm_pfn_flags flag; + flag = HMM_PFN_REQ_FAULT; + ], [ + AC_DEFINE(HAVE_HMM_DROP_CUSTOMIZABLE_PFN_FORMAT, 1, + [hmm remove the customizable pfn format]) + AC_DEFINE(HAVE_HMM_RANGE_FAULT_1ARG, 1, + [hmm_range_fault() wants 1 arg]) + ], [ + dnl # + dnl # v5.6-rc3-21-g6bfef2f91945 + dnl # mm/hmm: remove HMM_FAULT_SNAPSHOT + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + hmm_range_fault(NULL);; + ], [ + AC_DEFINE(HAVE_HMM_RANGE_FAULT_1ARG, 1, + [hmm_range_fault() wants 1 arg]) + ]) + ]) +]) + +dnl # +dnl # v5.1-10231-gbf198b2b34bf: mm/mmu_notifier: pass down vma and reasons why mmu notifier is happening +dnl # 93f4e735b6d9 - mm/hmm: remove hmm_range_dma_map and hmm_range_dma_unmap 2019-11-23 19:56:45 -0400 +dnl # d28c2c9a4877 - mm/hmm: make full use of walk_page_range() 2019-11-23 19:56:45 -0400 +dnl # d3eeb1d77c5d - xen/gntdev: use mmu_interval_notifier_insert 2019-11-23 19:56:45 -0400 +dnl # a22dd506400d - mm/hmm: remove hmm_mirror and related 2019-11-23 19:56:45 -0400 +dnl # 81fa1af31b5d - drm/amdgpu: Use mmu_interval_notifier instead of hmm_mirror 2019-11-23 19:56:45 -0400 +dnl # 62914a99dee5 - drm/amdgpu: Use mmu_interval_insert instead of hmm_mirror 2019-11-23 19:56:45 -0400 +dnl # a9ae8731e6e5 - drm/amdgpu: Call find_vma under mmap_sem 2019-11-23 19:56:44 -0400 +dnl # 20fef4ef84bf - nouveau: use mmu_interval_notifier instead of hmm_mirror 2019-11-23 19:56:44 -0400 +dnl # c625c274ee00 - nouveau: use mmu_notifier directly for invalidate_range_start 2019-11-23 19:56:44 -0400 +dnl # 3506ff69c3ec - drm/radeon: use mmu_interval_notifier_insert 2019-11-23 19:56:44 -0400 +dnl # 3889551db212 - RDMA/hfi1: Use mmu_interval_notifier_insert for user_exp_rcv 2019-11-23 19:56:44 -0400 +dnl # f25a546e6529 - RDMA/odp: Use mmu_interval_notifier_insert() 2019-11-23 19:56:44 -0400 +dnl # 107e899874e9 - mm/hmm: define the pre-processor related parts of hmm.h even if disabled 2019-11-23 19:56:44 -0400 +dnl # v5.4-rc5-20-g04ec32fbc2b2 - mm/hmm: allow hmm_range to be used with a mmu_interval_notifier or hmm_mirror 2019-11-23 19:56:44 -0400 +dnl # 99cb252f5e68 - mm/mmu_notifier: add an interval tree notifier 2019-11-23 19:56:44 -0400 +dnl # 56f434f40f05 - mm/mmu_notifier: define the header pre-processor parts even if disabled 2019-11-12 20:18:27 -0400 +dnl # 7d4a8be0c4b2 - mm/mmu_notifier: remove unused mmu_notifier_range_update_to_read_only export +dnl # +AC_DEFUN([AC_AMDGPU_HMM], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + #ifdef CONFIG_HMM_MIRROR + struct hmm_range *range = NULL; + struct mmu_notifier_range *mmu_range = NULL; + + range->notifier = NULL; + mmu_range->event = 0; + #else + #error CONFIG_HMM_MIRROR not enabled + #endif + ], [ + AC_DEFINE(HAVE_AMDKCL_HMM_MIRROR_ENABLED, 1, + [hmm support is enabled]) + AC_AMDGPU_HMM_RANGE_FAULT + AC_AMDGPU_HSA_AMD_SVM + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/hypervisor_is_type.m4 b/drivers/gpu/drm/amd/dkms/m4/hypervisor_is_type.m4 new file mode 100644 index 0000000000000..2d2702416ffd7 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/hypervisor_is_type.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit: 79cc74155218316b9a5d28577c7077b2adba8e58 +dnl # x86/paravirt: Provide a way to check for hypervisors +dnl # +AC_DEFUN([AC_AMDGPU_HYPERVISOR_IS_TYPE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + hypervisor_is_type(X86_HYPER_NATIVE); + ], [ + AC_DEFINE(HAVE_HYPERVISOR_IS_TYPE, 1, + [hypervisor_is_type() is available]) + ], [ + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/i2c-lock-operations-struct.m4 b/drivers/gpu/drm/amd/dkms/m4/i2c-lock-operations-struct.m4 new file mode 100644 index 0000000000000..c8655f6c15d91 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/i2c-lock-operations-struct.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # commit d1ed7985b9a6b85ea38a330108c51ec83381c01b +dnl # Author: Peter Rosin +dnl # Date: Thu Aug 25 23:07:01 2016 +0200 +dnl # i2c: move locking operations to their own structure +dnl # +AC_DEFUN([AC_AMDGPU_I2C_LOCK_OPERATIONS_STRUCT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct i2c_lock_operations drm_dp_i2c_lock_ops; + drm_dp_i2c_lock_ops.lock_bus = NULL; + ], [ + AC_DEFINE(HAVE_I2C_LOCK_OPERATIONS_STRUCT, 1, + [struct i2c_lock_operations is defined]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/i2c_new_client_device.m4 b/drivers/gpu/drm/amd/dkms/m4/i2c_new_client_device.m4 new file mode 100644 index 0000000000000..a4d3e37bfa38c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/i2c_new_client_device.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.1-12318-g7159dbdae3c5 +dnl # i2c: core: improve return value handling of i2c_new_device and i2c_new_dummy +dnl # +AC_DEFUN([AC_AMDGPU_I2C_NEW_CLIENT_DEVICE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + i2c_new_client_device(NULL, NULL); + ],[i2c_new_client_device], [drivers/i2c/i2c-core-base.c],[ + AC_DEFINE(HAVE_I2C_NEW_CLIENT_DEVICE, 1, + [i2c_new_client_device() is enabled]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/idr.m4 b/drivers/gpu/drm/amd/dkms/m4/idr.m4 new file mode 100644 index 0000000000000..1c678e3c401b7 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/idr.m4 @@ -0,0 +1,61 @@ +dnl # +dnl # commit d3e709e63e97e5f3f129b639991cfe266da60bae +dnl # Author: Matthew Wilcox +dnl # Date: Thu Dec 22 13:30:22 2016 -0500 +dnl # idr: Return the deleted entry from idr_remove +dnl # +AC_DEFUN([AC_AMDGPU_IDR_REMOVE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + void *i; + i = idr_remove(NULL, 0); + ], [ + AC_DEFINE(HAVE_IDR_REMOVE_RETURN_VOID_POINTER, 1, + [idr_remove return void pointer]) + ]) + ]) +]) + +dnl # +dnl # commit v6.1-rc1~27-c4f306e31632 +dnl # drm/amdgpu: use idr_init_base() to initialize fpriv->bo_list_handles +dnl # +AC_DEFUN([AC_AMDGPU_IDR_INIT_BASE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + idr_init_base(NULL, 0); + ], [ + AC_DEFINE(HAVE_IDR_INIT_BASE, 1, + [idr_init_base() is available]) + ]) + ]) +]) + +dnl # +dnl # commit v4.16-rc1~25-6ce711f27500 +dnl # idr: Make 1-based IDRs more efficient +dnl # +AC_DEFUN([AC_AMDGPU_STRUCT_IDE_IDR_BASE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct idr *idr = NULL; + idr->idr_base = 0; + ], [ + AC_DEFINE(HAVE_STRUCT_IDE_IDR_BASE, 1, + [ide->idr_base is available]) + ]) + ]) +]) + + +AC_DEFUN([AC_AMDGPU_IDR], [ + AC_AMDGPU_IDR_REMOVE + AC_AMDGPU_IDR_INIT_BASE + AC_AMDGPU_STRUCT_IDE_IDR_BASE +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/import_guid.m4 b/drivers/gpu/drm/amd/dkms/m4/import_guid.m4 new file mode 100644 index 0000000000000..c96b4703e6cf9 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/import_guid.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # v5.6-rc7-127-gd01cd62400b3 +dnl # uuid: Add inline helpers to import / export UUIDs +dnl # +AC_DEFUN([AC_AMDGPU_IMPORT_GUID], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + import_guid(NULL, NULL); + ],[ + AC_DEFINE(HAVE_IMPORT_GUID, 1, + [import_guid() is available]) + ]) + ]) +]) + + + diff --git a/drivers/gpu/drm/amd/dkms/m4/in-compat-syscall.m4 b/drivers/gpu/drm/amd/dkms/m4/in-compat-syscall.m4 new file mode 100644 index 0000000000000..45f413011ba01 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/in-compat-syscall.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v4.5-11126-g5180e3e24fd3 +dnl # compat: add in_compat_syscall to ask whether we're in a compat syscall +dnl # +AC_DEFUN([AC_AMDGPU_IN_COMPAT_SYSCALL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + in_compat_syscall(); + ],[ + AC_DEFINE(HAVE_IN_COMPAT_SYSCALL, 1, + [in_compat_syscall is defined]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/interval-tree-insert.m4 b/drivers/gpu/drm/amd/dkms/m4/interval-tree-insert.m4 new file mode 100644 index 0000000000000..52baac13d3460 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/interval-tree-insert.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit f808c13fd3738948e10196496959871130612b61 +dnl # lib/interval_tree: fast overlap detection +dnl # +AC_DEFUN([AC_AMDGPU_INTERVAL_TREE_INSERT_HAVE_RB_ROOT_CACHED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct rb_root_cached *r = NULL; + interval_tree_insert(NULL, r); + ],[ + AC_DEFINE(HAVE_TREE_INSERT_HAVE_RB_ROOT_CACHED, 1, + [interval_tree_insert have struct rb_root_cached]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/invalidate-range-start.m4 b/drivers/gpu/drm/amd/dkms/m4/invalidate-range-start.m4 new file mode 100644 index 0000000000000..d9edaefbebdcf --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/invalidate-range-start.m4 @@ -0,0 +1,36 @@ +dnl # +dnl # commit 5d6527a784f7a6d247961e046e830de8d71b47d1 +dnl # Author: Jérôme Glisse +dnl # Date: Fri Dec 28 00:38:05 2018 -0800 +dnl # mm/mmu_notifier: use structure for invalidate_range_start/end callback +dnl # Patch series "mmu notifier contextual informations", v2. +dnl # +AC_DEFUN([AC_AMDGPU_INVALIDATE_RANGE_START], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct mmu_notifier_ops *ops = NULL; + ops->invalidate_range_start(NULL, NULL); + ], [ + AC_DEFINE(HAVE_2ARGS_INVALIDATE_RANGE_START, 1, + whether invalidate_range_start() wants 2 args) + ], [ + dnl # + dnl # commit 93065ac753e4443840a057bfef4be71ec766fde9 + dnl # Author: Michal Hocko + dnl # Date: Tue Aug 21 21:52:33 2018 -0700 + dnl # mm, oom: distinguish blockable mode for mmu notifiers + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct mmu_notifier_ops *ops = NULL; + ops->invalidate_range_start(NULL, NULL, 1, 1, 1); + ], [ + AC_DEFINE(HAVE_5ARGS_INVALIDATE_RANGE_START, 1, + whether invalidate_range_start() wants 5 args) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/io-mapping-map-local-wc.m4 b/drivers/gpu/drm/amd/dkms/m4/io-mapping-map-local-wc.m4 new file mode 100644 index 0000000000000..72ba661169b58 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/io-mapping-map-local-wc.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # commit e66f6e095486f0210fcf3c5eb3ecf13fa348be4c +dnl # io-mapping: Provide iomap_local variant +dnl # +AC_DEFUN([AC_AMDGPU_IO_MAPPING_MAP_LOCAL_WC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + io_mapping_map_local_wc(NULL, 0); + ], [ + AC_DEFINE(HAVE_IO_MAPPING_MAP_LOCAL_WC, 1, [io_mapping_map_local_wc() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/io-mapping-unmap-local.m4 b/drivers/gpu/drm/amd/dkms/m4/io-mapping-unmap-local.m4 new file mode 100644 index 0000000000000..282b77e11e1c5 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/io-mapping-unmap-local.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # commit e66f6e095486f0210fcf3c5eb3ecf13fa348be4c +dnl # io-mapping: Provide iomap_local variant +dnl # +AC_DEFUN([AC_AMDGPU_IO_MAPPING_UNMAP_LOCAL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + io_mapping_unmap_local(NULL); + ], [ + AC_DEFINE(HAVE_IO_MAPPING_UNMAP_LOCAL, 1, [io_mapping_unmap_local() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/is_cow_mapping.m4 b/drivers/gpu/drm/amd/dkms/m4/is_cow_mapping.m4 new file mode 100644 index 0000000000000..116779b2674f0 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/is_cow_mapping.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # commit v5.12-rc2-346-g97a7e4733b9b +dnl # mm: introduce page_needs_cow_for_dma() for deciding whether cow +dnl # +AC_DEFUN([AC_AMDGPU_IS_COW_MAPPING], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + is_cow_mapping(VM_SHARED); + ], [ + AC_DEFINE(HAVE_IS_COW_MAPPING, 1, [is_cow_mapping() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/jiffies64_to_msecs.m4 b/drivers/gpu/drm/amd/dkms/m4/jiffies64_to_msecs.m4 new file mode 100644 index 0000000000000..29cc9b9271330 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/jiffies64_to_msecs.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # v5.1-rc3-699-g3b15d09f7e6d +dnl # time: Introduce jiffies64_to_msecs() +dnl # +AC_DEFUN([AC_AMDGPU_JIFFIES64_TO_MSECS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + jiffies64_to_msecs(0); + ],[jiffies64_to_msecs], [kernel/time/time.c], [ + AC_DEFINE(HAVE_JIFFIES64_TO_MSECS, 1, [jiffies64_to_msecs() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kallsyms-lookup-name.m4 b/drivers/gpu/drm/amd/dkms/m4/kallsyms-lookup-name.m4 new file mode 100644 index 0000000000000..4e123cddf2838 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/kallsyms-lookup-name.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.6-11591-g0bd476e6c671 kallsyms: unexport kallsyms_lookup_name() and kallsyms_on_each_symbol() +dnl # v2.6.32-rc4-272-gf60d24d2ad04 hw-breakpoints: Fix broken hw-breakpoint sample module +dnl # +AC_DEFUN([AC_AMDGPU_KALLSYMS_LOOKUP_NAME], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + kallsyms_lookup_name(NULL); + ],[kallsyms_lookup_name],[kernel/kallsyms.c],[ + AC_DEFINE(HAVE_KALLSYMS_LOOKUP_NAME, 1, + [kallsyms_lookup_name is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kernel.m4 b/drivers/gpu/drm/amd/dkms/m4/kernel.m4 new file mode 100644 index 0000000000000..a391cda05a40f --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/kernel.m4 @@ -0,0 +1,704 @@ +dnl # +dnl # Default kernel configuration +dnl # +AC_DEFUN([AC_CONFIG_KERNEL], [ + AC_KERNEL + AC_KERNEL_SINGLE_TARGET + AC_AMDGPU_LINUX_HEADERS + AC_AMDGPU_DRM_HEADERS + AC_AMDGPU_KALLSYMS_LOOKUP_NAME + AC_KERNEL_SUPPORTED_AMD_CHIPS + AC_AMDGPU_IDR + AC_AMDGPU_TYPE__POLL_T + AC_AMDGPU_DMA_MAP_SGTABLE + AC_AMDGPU_I2C_NEW_CLIENT_DEVICE + AC_AMDGPU_I2C_LOCK_OPERATIONS_STRUCT + AC_AMDGPU_BACKLIGHT_DEVICE_SET_BRIGHTNESS + AC_AMDGPU_DEV_PM_SET_DRIVER_FLAGS + AC_AMDGPU_COMPAT_PTR_IOCTL + AC_AMDGPU___KTHREAD_SHOULD_PARK + AC_AMDGPU_LIST_ROTATE_TO_FRONT + AC_AMDGPU_LIST_IS_FIRST + AC_AMDGPU_ARCH_IO_RESERVE_FREE_MEMTYPE_WC + AC_AMDGPU_ACCESS_OK_WITH_TWO_ARGUMENTS + AC_AMDGPU_IN_COMPAT_SYSCALL + AC_AMDGPU_SEQ_HEX_DUMP + AC_AMDGPU_KSYS_SYNC_HELPER + AC_AMDGPU_PCI_UPSTREAM_BRIDGE + AC_AMDGPU_PCI_CONFIGURE_EXTENDED_TAGS + AC_AMDGPU_PCI + AC_AMDGPU_PCI_REBAR_BYTES_TO_SIZE + AC_AMDGPU_KTIME_GET_BOOTTIME_NS + AC_AMDGPU_KTIME_GET_RAW_NS + AC_AMDGPU_MEMALLOC_NOFS_SAVE + AC_AMDGPU_ZONE_MANAGED_PAGES + AC_AMDGPU_FAULT_FLAG_ALLOW_RETRY_FIRST + AC_AMDGPU_FSLEEP + AC_AMDGPU_VMF_INSERT + AC_AMDGPU_VMF_INSERT_MIXED_PROT + AC_AMDGPU_VMF_INSERT_PFN_PROT + AC_AMDGPU_VM_OPERATIONS_STRUCT_FAULT + AC_AMDGPU_MMU_NOTIFIER + AC_AMDGPU_MMU_NOTIFIER_SYNCHRONIZE + AC_AMDGPU_MMU_NOTIFIER_CALL_SRCU + AC_AMDGPU_MM_RELEASE_PAGES + AC_AMDGPU_DMA_RESV + AC_AMDGPU_TTM_BUFFER_OBJECT + AC_AMDGPU_DEVCGROUP_CHECK_PERMISSION + AC_AMDGPU_HMM + AC_AMDGPU_INVALIDATE_RANGE_START + AC_AMDGPU_DOWN_WRITE_KILLABLE + AC_AMDGPU_INTERVAL_TREE_INSERT_HAVE_RB_ROOT_CACHED + AC_AMDGPU_GET_USER_PAGES_REMOTE + AC_AMDGPU_GET_USER_PAGES + AC_AMDGPU_DMA_BUF + AC_AMDGPU_LIST_FOR_EACH_ENTRY + AC_AMDGPU_AMD_IOMMU_PC_SUPPORTED + AC_AMDGPU_AMD_IOMMU_INVALIDATE_CTX + AC_AMDGPU_DEV_PAGEMAP + AC_AMDGPU_DOWN_READ_KILLABLE + AC_AMDGPU_DRM_CACHE + AC_AMDGPU_DRM_DEBUG_ENABLED + AC_AMDGPU_DRM_GEM_OBJECT_PUT + AC_AMDGPU_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS + AC_AMDGPU_DRM_CONNECTOR_INIT_WITH_DDC + AC_AMDGPU_DRM_DP_CALC_PBN_MODE + AC_AMDGPU_DRM_DP_ATOMIC_FUNCS + AC_AMDGPU_DRM_DP_SEND_REAL_EDID_CHECKSUM + AC_AMDGPU_DRM_DP_CEC_CORRELATION_FUNCTIONS + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_RESUME + AC_AMDGPU___DRM_ATOMIC_HELPER_CRTC_RESET + AC_AMDGPU_DRM_DRIVER_GEM_PRIME_RES_OBJ + AC_AMDGPU_DRM_DRV_GEM_PRIME_EXPORT + AC_AMDGPU_DRM_PRINT_BITS + AC_AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES + AC_AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT_SIGNAL + AC_AMDGPU_STRUCT_DRM_DEVICE + AC_AMDGPU_DRM_DRIVER_FEATURE + AC_AMDGPU___DRM_ATOMIC_HELPER_CRTC_RESET + AC_AMDGPU_PCI_PR3_PRESENT + AC_AMDGPU_KTHREAD_USE_MM + AC_AMDGPU_DRM_WRITEBACK_CONNECTOR_INIT + AC_AMDGPU_DRM_CONNECTOR_HELPER_FUNCS_PREPARE_WRITEBACK_JOB + AC_AMDGPU_DRM_FB_HELPER_FILL_INFO + AC_AMDGPU_DRM_FB_HELPER_INIT + AC_AMDGPU_DRM_HELPER_FORCE_DISABLE_ALL + AC_AMDGPU_STRUCT_DRM_CRTC_FUNCS + AC_AMDGPU_DRM_EDID + AC_AMDGPU_DRM_CONNECTOR_FOR_EACH_POSSIBLE_ENCODER + AC_AMDGPU_DRM_EDID_OVERRIDE_CONNECTOR_UPDATE + AC_AMDGPU_DRM_MODE_INIT + AC_AMDGPU_DRM_DP_MST_TOPOLOGY + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_CBS + AC_AMDGPU_DRM_ATOMIC_PRIVATE_OBJ_INIT + AC_AMDGPU_DRM_ATOMIC_PLANE_ENABLING + AC_AMDGPU_STRUCT_DRM_PLANE_HELPER_FUNCS + AC_AMDGPU_DRM_DP_MST_ATOMIC_CHECK + AC_AMDGPU_DRM_DP_MST_ATOMIC_ENABLE_DSC + AC_AMDGPU_DRM_CONNECTOR_HELPER_FUNCS + AC_AMDGPU_DRM_CONNECTOR_EDID_OVERRIDE + AC_AMDGPU_DRM_DP_MST_DETECT_PORT + AC_AMDGPU_STRUCT_DRM_CRTC_STATE + AC_AMDGPU_DRM_DP_MST_DSC_AUX_FOR_PORT + AC_AMDGPU_DRM_DP_MST_ADD_AFFECTED_DSC_CRTCS + AC_AMDGPU_DRM_CONNECTOR_HAVE_HDR_SINK_METADATA + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_INIT + AC_AMDGPU_DRM_MODE_CONFIG + AC_AMDGPU_DRM_VBLANK_CRTC_CONFIG + AC_AMDGPU_DRM_CONNECTOR_STATE_HDCP_CONTENT_TYPE + AC_AMDGPU_DRM_HDCP_UPDATE_CONTENT_PROTECTION + AC_AMDGPU_DRM_MODE_CREATE_COLORSPACE_PROPERTY_FUNCS + AC_AMDGPU_STRUCT_DRM_CONNECTOR_STATE + AC_AMDGPU_JIFFIES64_TO_MSECS + AC_AMDGPU_ACPI_PUT_TABLE + AC_AMDGPU_ACPI_DEV_GET_FIRST_MATCH_DEV + AC_AMDGPU_DRM_ATOMIC_HELPER_CALC_TIMESTAMPING_CONSTANTS + AC_AMDGPU_DRM_FORMAT_INFO + AC_AMDGPU_STRUCT_DRM_CONNECTOR_STATE_COLORSPACE + AC_AMDGPU_STRUCT_DRM_ATOMIC_STATE_DUPLICATED + AC_AMDGPU_DRM_DP_SUBCONNECTOR + AC_AMDGPU_DRM_PRIME_SG_TO_DMA_ADDR_ARRAY + AC_AMDGPU_DRM_PRIME_PAGES_TO_SG + AC_AMDGPU_DRM_CRTC_HELPER_FUNCS + AC_AMDGPU_DEBUGFS_CREATE_FILE_SIZE + AC_AMDGPU_DRM_DRIVER_GEM_OPEN_OBJECT + AC_AMDGPU_FS_RECLAIM_ACQUIRE + AC_AMDGPU_MEMALLOC_NORECLAIM_SAVE + AC_AMDGPU_PM_SUSPEND_VIA_FIRMWARE + AC_AMDGPU_SYSFS_EMIT + AC_AMDGPU_KTIME_IS_UNION + AC_AMDGPU_PXM_TO_NODE + AC_AMDGPU_ACPI_SRAT_GENERIC_AFFINITY + AC_AMDGPU_KERNEL_WRITE + AC_AMDGPU_STRUCT_XARRAY + AC_AMDGPU_MMPUT_ASYNC + AC_AMDGPU_DRM_MEMCPY_FROM_WC + AC_AMDGPU_IS_COW_MAPPING + AC_AMDGPU_VGA_REMOVE_VGACON + AC_AMDGPU_PCI_DRIVER_DEV_GROUPS + AC_AMDGPU_DRM_DISPLAY_INFO + AC_AMDGPU_IO_MAPPING_UNMAP_LOCAL + AC_AMDGPU_IO_MAPPING_MAP_LOCAL_WC + AC_AMDGPU_KMAP_LOCAL + AC_AMDGPU_DRM_DP_AUX_DRM_DEV + AC_AMDGPU_DRM_DP_LINK_TRAIN_CLOCK_RECOVERY_DELAY + AC_AMDGPU_DRM_DP_LINK_TRAIN_CHANNEL_EQ_DELAY + AC_AMDGPU_DRM_CONNECTOR_ATOMIC_HDR_METADATA_EQUAL + AC_AMDGPU_DRM_CONNECTOR_ATTACH_HDR_OUTPUT_METADATA_PROPERTY + AC_AMDGPU_DRM_CONNECTOR_STATE_HDR_OUTPUT_METADATA + AC_AMDGPU_DRM_DEVICE_PDEV + AC_AMDGPU_DRM_CONNECTOR_SET_PANEL_ORIENTATION_WITH_QUIRK + AC_AMDGPU_DRM_SIMPLE_ENCODER_INIT + AC_AMDGPU_DEV_IS_REMOVABLE + AC_AMDGPU_DRM_DP_UPDATE_PAYLOAD_PART1_START_SLOT_ARG + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_STATE_TOTAL_AVAIL_SLOTS + AC_AMDGPU_DRM_DISPLAY_INFO_IS_HDMI + AC_AMDGPU_DRM_BITMAP_FUNCS + AC_AMDGPU_STRUCT_KOBJ_TYPE + AC_AMDGPU_ATTRIBUTE_GROUP_IS_BIN_VISIBLE + AC_AMDGPU_MIGRATE_DISABLE + AC_AMDGPU_CLOSE_FD + AC_AMDGPU_DRM_DP_MST_HPD_IRQ_HANDLE_EVENT + AC_AMDGPU_DRM_VMA_OFFSET_NODE_READONLY_FIELD + AC_AMDGPU_WW_MUTEX_TRYLOCK_CONTEXT_ARG + AC_AMDGPU_DRM_APERTURE_REMOVE_CONFLICTING_PCI_FRAMEBUFFERS_DRM_DRIVER_ARG + AC_AMDGPU_SYNCHRONIZE_SHRINKERS + AC_AMDGPU_KREALLOC_ARRAY + AC_AMDGPU_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE + AC_AMDGPU_VMA_LOOKUP + AC_AMDGPU_DMA_FENCE_CHAIN_ALLOC + AC_AMDGPU_DMA_FENCE_CHAIN_STRUCT + AC_AMDGPU_DMA_FENCE_OPS_USE_64BIT_SEQNO + AC_AMDGPU_GENERIC_HANDLE_DOMAIN_IRQ + AC_AMDGPU__DMA_FENCE_IS_LATER + AC_AMDGPU_DRM_FIRMWARE_DRIVERS_ONLY + AC_AMDGPU_DMA_FENCE_DESCRIBE + AC_AMDGPU_DRM_KMS_HELPER_CONNECTOR_HOTPLUG_EVENT + AC_AMDGPU_PCIE_ASPM_ENABLED + AC_AMDGPU_PM_SUSPEND_TARGET_STATE + AC_AMDGPU_SMCA_GET_BANK_TYPE + AC_AMDGPU_MCE_PRIO_UC + AC_AMDGPU_X86_HYPERVISOR_TYPE + AC_AMDGPU_HYPERVISOR_IS_TYPE + AC_AMDGPU_PCI_DEV_LTR_PATH + AC_AMDGPU_DMA_FENCE_IS_CONTAINER + AC_AMDGPU_STR_YES_NO + AC_AMDGPU_TOTALRAM_PAGES + AC_AMDGPU_DMA_FENCE_CHAIN_CONTAINED + AC_AMDGPU_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_BASE + AC_AMDGPU_DRM_DSC_CONFIG_SIMPLE_422 + AC_AMDGPU_DRM_DP_READ_DPCD_CAPS + AC_AMDGPU_DRM_DP_REMOVE_RAYLOAD_PART + AC_AMDGPU_DRM_DSC_PPS_PAYLOAD_PACK + AC_AMDGPU_DRM_DSC_COMPUTE_RC_PARAMETERS + AC_AMDGPU_DRM_GEM_PLANE_HELPER_PREPARE_FB + AC_AMDGPU_BITMAP_TO_ARR32 + AC_AMDGPU_SHRINKER + AC_AMDGPU_STRUCT_DRM_EDID + AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX + AC_AMDGPU_DRM_DP_MST_PORT_FULL_PBN + AC_AMDGPU_ACPI_VIDEO_FUNCS + AC_AMDGPU_DRM_PLANE_HELPER_FUNCS + AC_AMDGPU_MEMORY_DEVICE_COHERENT + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_STATE_PAYLOADS + AC_AMDGPU_DRM_DP_MST_TOPOLOGY_STATE_PBN_DIV + AC_AMDGPU_MIGRATE_VMA_FAULT_PAGE + AC_AMDGPU_RB_ADD_CACHED + AC_AMDGPU_WANT_INIT_ON_FREE + AC_AMDGPU_APPLE_GMUX_DETECT + AC_AMDGPU_MM_KMALLOC_SIZE_ROUNDUP + AC_AMDGPU_ZONE_DEVICE_PAGE_INIT + AC_AMDGPU_DRM_SUBALLOC_MANAGER_INIT + AC_AMDGPU_VM_FLAGS_SET + AC_AMDGPU_MMAP_ASSERT_WRITE_LOCKED + AC_AMDGPU_DRM_GEM_PRIME_HANDLE_TO_DMABUF + AC_AMDGPU_PID_TYPE + AC_AMDGPU_FOLLOW_PFN + AC_AMDGPU_IMPORT_GUID + AC_AMDGPU_DRM_DP_MST_BRANCH_GUID_T + AC_AMDGPU_LIST_CMP_FUNC_IS_CONST_PARAM + AC_AMDGPU_DMA_FENCE_OPS_SET_DEADLINE + AC_AMDGPU_DRM_SHOW_FDINFO + AC_AMDGPU_LINUX_ATOMIC_LONG_TRY_CMPXCHG + AC_AMDGPU_LINUX_LOCAL_TRY_CMPXCHG + AC_AMDGPU_LINUX_DEVICE_CLASS + AC_AMDGPU_KVREALLOC + AC_AMDGPU_DMA_BUF_IS_DYNAMIC + AC_AMDGPU_RADIX_TREE_ITER_DELETE + AC_AMDGPU_KFIFO_PUT + AC_AMDGPU_DRM_CLIENT_REGISTER + AC_AMDGPU_DRM_COLOR_CTM_3X4 + AC_AMDGPU_DRM_DRIVER_GEM_PRIME_MMAP + AC_AMDGPU_DRM_GEM_PRIME_HANDLE_TO_FD + AC_AMDGPU_DMA_FENCE_TIMESTAMP + AC_AMDGPU_VMA_IS_INITIAL + AC_AMDGPU_CPUINFO_X86 + AC_AMDGPU_DMA_FENCE_IS_LATER_OR_SAME + AC_AMDGPU_WORKQUEUE + AC_AMDGPU_DRM_EXEC_INIT + AC_AMDGPU_DRM_DBG_PRINTER + AC_AMDGPU_DRM_GEM_OBJECT_IS_SHARED_FOR_MEMORY_STATS + AC_AMDGPU_DRM_DEBUG_CATEGORY + AC_AMDGPU_SMCA_UMC_V2 + AC_AMDGPU_TOPOLOGY_NUM_CORES_PER_PACKAGE + AC_AMDGPU_DRM_CRTC_VBLANK_CRTC + AC_AMDGPU_DRM_DP_ADD_PAYLOAD_PART2_THREE_ARGUMENTS + AC_AMDGPU_ASSIGN_STR + + AC_KERNEL_WAIT + AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ + KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" + ]) + + AC_SUBST(KERNEL_MAKE) + AH_BOTTOM([#include "config-amd-chips.h"]) + AH_BOTTOM([#define AMDGPU_VERSION PACKAGE_VERSION]) +]) + +dnl # +dnl # Detect name used for Module.symvers file in kernel +dnl # +AC_DEFUN([AC_MODULE_SYMVERS], [ + modpost=$LINUX/scripts/Makefile.modpost + AC_MSG_CHECKING([kernel file name for module symbols]) + AS_IF([test -f "$modpost"], [ + AS_IF([grep -q Modules.symvers $modpost], [ + LINUX_SYMBOLS=Modules.symvers + ], [ + LINUX_SYMBOLS=Module.symvers + ]) + + AS_IF([test "x$enable_linux_builtin" != xyes -a ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [ + AC_MSG_ERROR([ + *** Please make sure the kernel devel package for your distribution + *** is installed. If you are building with a custom kernel, make sure the + *** kernel is configured, built, and the '--with-linux=PATH' configure + *** option refers to the location of the kernel source.]) + ]) + ], [ + LINUX_SYMBOLS=NONE + ]) + AC_MSG_RESULT($LINUX_SYMBOLS) + AC_SUBST(LINUX_SYMBOLS) +]) + +dnl # +dnl # Detect the kernel to be built against +dnl # +AC_DEFUN([AC_KERNEL], [ + AC_ARG_WITH([linux], + AS_HELP_STRING([--with-linux=PATH], + [Path to kernel source]), + [kernelsrc="$withval"]) + + AC_ARG_WITH(linux-obj, + AS_HELP_STRING([--with-linux-obj=PATH], + [Path to kernel build objects]), + [kernelbuild="$withval"]) + + AC_MSG_CHECKING([kernel source directory]) + AS_IF([test -z "$kernelsrc"], [ + AS_IF([test -e "/lib/modules/$KERNELVER/source"], [ + headersdir="/lib/modules/$KERNELVER/source" + sourcelink=$(readlink -f "$headersdir") + ], [test -e "/lib/modules/$KERNELVER/build"], [ + headersdir="/lib/modules/$KERNELVER/build" + sourcelink=$(readlink -f "$headersdir") + ], [ + sourcelink=$(ls -1d /usr/src/kernels/* \ + /usr/src/linux-* \ + 2>/dev/null | grep -v obj | tail -1) + ]) + + AS_IF([test -n "$sourcelink" && test -e ${sourcelink}], [ + kernelsrc=`readlink -f ${sourcelink}` + ], [ + kernelsrc="[Not found]" + ]) + ], [ + AS_IF([test "$kernelsrc" = "NONE"], [ + kernsrcver=NONE + ]) + withlinux=yes + ]) + + AC_MSG_RESULT([$kernelsrc]) + AS_IF([test ! -d "$kernelsrc"], [ + AC_MSG_ERROR([ + *** Please make sure the kernel devel package for your distribution + *** is installed and then try again. If that fails, you can specify the + *** location of the kernel source with the '--with-linux=PATH' option.]) + ]) + + AC_MSG_CHECKING([kernel build directory]) + AS_IF([test -z "$kernelbuild"], [ + AS_IF([test x$withlinux != xyes -a -e "/lib/modules/$KERNELVER/build"], [ + kernelbuild=`readlink -f /lib/modules/$KERNELVER/build` + ], [test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}], [ + kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu} + ], [test -d ${kernelsrc}-obj/${target_cpu}/default], [ + kernelbuild=${kernelsrc}-obj/${target_cpu}/default + ], [test -d `dirname ${kernelsrc}`/build-${target_cpu}], [ + kernelbuild=`dirname ${kernelsrc}`/build-${target_cpu} + ], [ + kernelbuild=${kernelsrc} + ]) + ]) + AC_MSG_RESULT([$kernelbuild]) + + AC_MSG_CHECKING([kernel source version]) + utsrelease1=$kernelbuild/include/linux/version.h + utsrelease2=$kernelbuild/include/linux/utsrelease.h + utsrelease3=$kernelbuild/include/generated/utsrelease.h + AS_IF([test -r $utsrelease1 && fgrep -q UTS_RELEASE $utsrelease1], [ + utsrelease=linux/version.h + ], [test -r $utsrelease2 && fgrep -q UTS_RELEASE $utsrelease2], [ + utsrelease=linux/utsrelease.h + ], [test -r $utsrelease3 && fgrep -q UTS_RELEASE $utsrelease3], [ + utsrelease=generated/utsrelease.h + ]) + + AS_IF([test "$utsrelease"], [ + kernsrcver=`(echo "#include <$utsrelease>"; + echo "kernsrcver=UTS_RELEASE") | + cpp -I $kernelbuild/include | + grep "^kernsrcver=" | cut -d \" -f 2` + + AS_IF([test -z "$kernsrcver"], [ + AC_MSG_RESULT([Not found]) + AC_MSG_ERROR([*** Cannot determine kernel version.]) + ]) + ], [ + AC_MSG_RESULT([Not found]) + if test "x$enable_linux_builtin" != xyes; then + AC_MSG_ERROR([*** Cannot find UTS_RELEASE definition.]) + else + AC_MSG_ERROR([ + *** Cannot find UTS_RELEASE definition. + *** Please run 'make prepare' inside the kernel source tree.]) + fi + ]) + + AC_MSG_RESULT([$kernsrcver]) + + LINUX=${kernelsrc} + LINUX_OBJ=${kernelbuild} + LINUX_VERSION=${kernsrcver} + build_dir_root=$(cd "${0%/*}" && pwd) + + AC_SUBST(LINUX) + AC_SUBST(LINUX_OBJ) + AC_SUBST(LINUX_VERSION) + + AC_MODULE_SYMVERS +]) + +dnl # +dnl # AC_KERNEL_CONFTEST_H +dnl # $1: contents to be filled in conftest.h +dnl # +AC_DEFUN([AC_KERNEL_CONFTEST_H], [ +cat - <<_ACEOF >conftest.h +$1 +_ACEOF +]) + +dnl # +dnl # AC_KERNEL_CONFTEST_C +dnl # fill in contents of conftest.h and $1 to conftest.c +dnl # $1: contents to be filled in conftest.c +dnl # +AC_DEFUN([AC_KERNEL_CONFTEST_C], [ +cat $build_dir_root/confdefs.h - <<_ACEOF >conftest.c +$1 +_ACEOF +]) + +dnl # +dnl # AC_KERNEL_LANG_PROGRAM([PROLOGUE], [BODY]) +dnl # +AC_DEFUN([AC_KERNEL_LANG_PROGRAM], [ +$1 +int +main (void) +{ +dnl Do *not* indent the following line: there may be CPP directives. +dnl Don't move the `;' right after for the same reason. +$2 + ; + return 0; +} +]) + +dnl # +dnl # AC_KERNEL_COMPILE_MODULE_IFELSE / like AC_COMPILE_IFELSE +dnl # $1: contents to be filled in conftest.c +dnl # $2: make target. +dnl # $3: user defined commands. It "AND" the make command to check the result. If true, expands to $4. Otherwise $5. +dnl # $4: run it if make & $3 pass. +dnl # $5: run it if make & $3 fail. +dnl # $6: contents to be filled in conftest.h. Could be null. +dnl # +AC_DEFUN([AC_KERNEL_COMPILE_MODULE_IFELSE], [ + m4_ifvaln([$1], [AC_KERNEL_CONFTEST_C([$1])]) + m4_ifvaln([$6], [AC_KERNEL_CONFTEST_H([$6])], [AC_KERNEL_CONFTEST_H([])]) + touch conftest.mod.c + if test "x$SINGLE_TARGET_BUILD_NO_TMP_VERSIONS" = x1; then + test -d $SINGLE_TARGET_BUILD_MODVERDIR || mkdir $SINGLE_TARGET_BUILD_MODVERDIR + rm -f $SINGLE_TARGET_BUILD_MODVERDIR/* + fi + echo "obj-m := conftest.o" >Makefile + kbuild_src_flag='' + kbuild_modpost_flag='KBUILD_MODPOST_NOFINAL=1 KBUILD_MODPOST_WARN=1' + kbuild_workaround_flag='' + kbuild_cc='' + if test -s ${LINUX_OBJ}/.config; then + if grep -q 'CONFIG_CC_IS_CLANG=y' "${LINUX_OBJ}/.config"; then + kbuild_cc='CC=clang' + fi + fi + test "x$enable_linux_builtin" = xyes && kbuild_src_flag='KBUILD_SRC=' # override KBUILD_SRC + test "x$enable_linux_builtin" = xyes && kbuild_workaround_flag='sub_make_done=' # override sub_make_done + AS_IF( + [AC_TRY_COMMAND(make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror -Wno-error=array-bounds" M=$PWD $kbuild_src_flag $kbuild_workaround_flag $kbuild_modpost_flag $kbuild_cc) >/dev/null && AC_TRY_COMMAND([$3])], + [$4], + [_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])] + ) +]) + +dnl # +dnl # AC_KERNEL_TMP_BUILD_DIR +dnl # $1: contents to be executed in a temporary directory +dnl # +AC_DEFUN([AC_KERNEL_TMP_BUILD_DIR], [ + build_dir=$(mktemp -d -t build_XXXXXXXX -p $build_dir_root) + cd $build_dir + $1 + AS_IF([test -s confdefs.h], [ + cat confdefs.h >>$build_dir_root/confdefs.h + ]) + cd $build_dir_root + rm -rf $build_dir +]) + +dnl # +dnl # AC_KERNEL_TRY_COMPILE_MODULE like AC_TRY_COMPILE +dnl # $1: Prologue for conftest.c. including header files, extends, etc +dnl # $2: Body for conftest.c. +dnl # $3: run it if compile pass. +dnl # $4: run it if compile fail. +dnl # +AC_DEFUN([AC_KERNEL_TRY_COMPILE_MODULE], + target='conftest.o' + [AC_KERNEL_COMPILE_MODULE_IFELSE( + [AC_LANG_SOURCE([AC_KERNEL_LANG_PROGRAM([[$1]], [[$2]])])], + [$target], + [test -s conftest.o], + [$3], [$4]) +]) + +dnl # +dnl # AC_KERNEL_COMPILE_IFELSE / like AC_COMPILE_IFELSE +dnl # $1: contents to be filled in conftest.c +dnl # $2: user defined commands. It "AND" the make command to check the result. If true, expands to $4. Otherwise $5. +dnl # $3: run it if make & $3 pass. +dnl # $4: run it if make & $3 fail. +dnl # $5: contents to be filled in conftest.h. Could be null. +dnl # +AC_DEFUN([AC_KERNEL_COMPILE_IFELSE], [ + m4_ifvaln([$1], [AC_KERNEL_CONFTEST_C([$1])]) + m4_ifvaln([$5], [AC_KERNEL_CONFTEST_H([$5])], [AC_KERNEL_CONFTEST_H([])]) + AS_IF( + [AC_TRY_COMMAND(eval $CC $CFLAGS) > /dev/null && AC_TRY_COMMAND([$2])], + [$3], + [_AC_MSG_LOG_CONFTEST m4_ifvaln([$4],[$4])] + ) +]) +dnl # +dnl # AC_KERNEL_TRY_COMPILE like AC_TRY_COMPILE +dnl # $1: Prologue for conftest.c. including header files, extends, etc +dnl # $2: Body for conftest.c. +dnl # $3: run it if compile pass. +dnl # $4: run it if compile fail. +dnl # +AC_DEFUN([AC_KERNEL_TRY_COMPILE], + [AC_KERNEL_COMPILE_IFELSE( + [AC_LANG_SOURCE([AC_KERNEL_LANG_PROGRAM([[$1]], [[$2]])])], + [test -s conftest.o || test -s .tmp_conftest.o], + [$3], [$4]) +]) + +dnl # +dnl # AC_KERNEL_CHECK_SYMBOL_EXPORT +dnl # check symbol exported or not +dnl # $1: symbol list to look for +dnl # $2: file list to look for $1 +dnl # $3: run it if pass. +dnl # $4: run it if fail. +dnl # +AC_DEFUN([AC_KERNEL_CHECK_SYMBOL_EXPORT], [ + awk -v s="$1" ' + BEGIN { + n = 0; + num = split(s, symbols, " ") + } { + for (i in symbols) + if (symbols[[i]] == $[2]) + n++ + } END { + if (num == n) + exit 0; + else + exit 1 + }' $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null + rc=$? + if test $rc -ne 0; then + n=0 + export=0 + for file in $2; do + n=$(awk -v s="$1" ' + BEGIN { + n = 0; + split(s, symbols, " ") + } { + for (i in symbols) { + s="EXPORT_SYMBOL.*\\("symbols[[i]]"\\)" + if ($[0] ~ s) + n++ + } + } END { + print n + }' $LINUX/$file 2>/dev/null) + rc=$? + if test $rc -eq 0; then + export=$(( $export+$n )) + fi + done + if test $(echo "$1" | wc -w) -eq $export; then : + $3 + else : + $4 + fi + else : + $3 + fi +]) + +dnl # +dnl # AC_KERNEL_TRY_COMPILE_SYMBOL +dnl # like AC_KERNEL_TRY_COMPILE, except AC_KERNEL_CHECK_SYMBOL_EXPORT +dnl # is called if not compiling for builtin +dnl # $1: Prologue for conftest.c. including header files, extends, etc +dnl # $2: Body for conftest.c. +dnl # $3: AC_KERNEL_CHECK_SYMBOL_EXPORT $1 +dnl # $4: AC_KERNEL_CHECK_SYMBOL_EXPORT $2 +dnl # $5: run it if checking pass +dnl # $6: run it if checking fail +dnl # +AC_DEFUN([AC_KERNEL_TRY_COMPILE_SYMBOL], [ + AC_KERNEL_TRY_COMPILE([$1], [$2], [rc=0], [rc=1]) + if test $rc -ne 0; then : + $6 + else + AC_KERNEL_CHECK_SYMBOL_EXPORT([$3], [$4], [rc=0], [rc=1]) + if test $rc -ne 0; then : + $6 + else : + $5 + fi + fi +]) + +dnl # +dnl # AC_KERNEL_TEST_HEADER_FILE_EXIST +dnl # check header file exist +dnl # $1: header file to check +dnl # $2: run it if header file exist +dnl # $3: run it if header file nonexistent +dnl # +AC_DEFUN([AC_KERNEL_TEST_HEADER_FILE_EXIST], [ + header_file=m4_normalize([$1]) + header_file_obj=$LINUX_OBJ/include/$header_file + header_file_src=$LINUX/include/$header_file + AS_IF([test -e $header_file_obj -o -e $header_file_src], [ + $2 + ], [ + $3 + ]) +]) + +dnl # +dnl # AC_KERNEL_CHECK_HEADERS +dnl # check whether header file(s) is(are) present +dnl # $1: header filei(s) to check +dnl # +AC_DEFUN([AC_KERNEL_CHECK_HEADERS], [ + AC_CHECK_HEADERS([$1],[AS_TR_CPP([HAVE_$1])=1],,[-]) +]) + +dnl # +dnl # AC_KERNEL_DO_BACKGROUND +dnl # $1: contents to be executed +dnl # +AC_DEFUN([AC_KERNEL_DO_BACKGROUND], [ + do_background() { + AC_KERNEL_TMP_BUILD_DIR([$1]) + } + + AC_CHECK_PROG(NPROC, nproc, yes) + AS_IF([test x"$NPROC" != x"yes"], [ + ncpu=1 + ], [ + ncpu=$(nproc) + ]) + + while [[ $(jobs | wc -l) -gt $ncpu ]] + do + sleep 0.1 + done + + do_background & + procs="$! $procs" +]) + +dnl # +dnl # AC_KERNEL_WAIT +dnl # wait for all tests to be finished +dnl # +AC_DEFUN([AC_KERNEL_WAIT], [ + AC_MSG_CHECKING([for module configuration]) + wait $procs + AS_IF([[[ $? -eq 0 ]]], [ + AC_MSG_RESULT([done]) + ], [ + AC_MSG_RESULT([failed]) + ]) +]) + +dnl # +dnl # AC_KERNEL_SUPPORTED_AMD_CHIPS +dnl # get list of graphics chips supported by the amdgpu kernel driver +dnl # +AC_DEFUN([AC_KERNEL_SUPPORTED_AMD_CHIPS], [ + AC_MSG_CHECKING([for supported chips]) + AS_IF([test $HAVE_DRM_AMD_ASIC_TYPE_H], [ + chips=$(awk 'BEGIN {enum = 0} { + if ($[0] ~ "^enum amd_asic_type") + enum = 1; + if (enum && $[1] ~ "CHIP_") { + gsub(",", ""); + if ($[1] == "CHIP_LAST") + exit; + print $[1]; + } + }' ../../include/drm/amd_asic_type.h) + + for i in $chips; do + $as_echo "#define HAVE_$i" >>config/config-amd-chips.h + done + AC_MSG_RESULT([done]) + ], [ + AC_MSG_RESULT([failed]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kernel_single_target.m4 b/drivers/gpu/drm/amd/dkms/m4/kernel_single_target.m4 new file mode 100644 index 0000000000000..91b36b22a7824 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/kernel_single_target.m4 @@ -0,0 +1,64 @@ +dnl # +dnl # extract cc, cflags, cppflags +dnl # +AC_DEFUN([AC_KERNEL_SINGLE_TARGET_CFLAGS], [ + AS_IF([test -s .conftest.o.cmd], [ + _conftest_cmd=$(head -1 .conftest.o.cmd) + + CC=$(echo $_conftest_cmd | awk -F ' ' '{print $[3]}') + + CFLAGS=$(echo $_conftest_cmd | \ + cut -d ' ' -f 4- | \ + sed -e "s|\./|${LINUX_OBJ}/|g" \ + -e "s|-I\([[[a-z]]]*\)|-I${LINUX_OBJ}/\1|g" \ + -e "s|-include \([[[a-z]]]*\)|-include ${LINUX_OBJ}/\1|g" \ + -e "s|$PWD|\${PWD}|g") + + CPPFLAGS=$(echo $CFLAGS | \ + cut -d ';' -f 1 | \ + sed 's| -|\n&|g' | \ + sed -n -e '/conftest/d' \ + -e '/KBUILD/d' \ + -e '/-I/p; /-include/p; /-isystem/p; /-D/p' | \ + xargs) + + CFLAGS=$(echo $CFLAGS | \ + sed -e "s|nostdinc|nostdinc -I../tiny_wrapper/include|") + + AC_SUBST(CC) + AC_SUBST(CFLAGS) + AC_SUBST(CPPFLAGS) + ], [ + AC_MSG_ERROR([cannot detect CFLAGS...]) + ]) +]) + +dnl # +dnl # v5.3-rc4-54-g54b8ae66ae1a +dnl # kbuild: change *FLAGS_.o to take the path relative to $(obj) +dnl # +AC_DEFUN([AC_KERNEL_FLAGS_TAKE_PATH], [ + AS_IF([grep -qsm 1 "target-stem" ${LINUX}/scripts/Makefile.lib], [ + AC_DEFINE(HAVE_AMDKCL_FLAGS_TAKE_PATH, 1, + [*FLAGS_.o support to take the path relative to $(obj)]) + ]) +]) + +dnl # +dnl # v4.20-rc2-10-ge07db28eea38 +dnl # kbuild: fix single target build for external module +dnl # +AC_DEFUN([AC_KERNEL_SINGLE_TARGET], [ + AC_KERNEL_TMP_BUILD_DIR([ + AC_KERNEL_TRY_COMPILE_MODULE([], [], [], [ + SINGLE_TARGET_BUILD_MODVERDIR=.tmp_versions + AS_IF([test ! -d $SINGLE_TARGET_BUILD_MODVERDIR], [ + SINGLE_TARGET_BUILD_NO_TMP_VERSIONS=1 + ], [ + AC_MSG_WARN([compile single target fail expectedly]) + ]) + ]) + AC_KERNEL_SINGLE_TARGET_CFLAGS + AC_KERNEL_FLAGS_TAKE_PATH + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kernel_write.m4 b/drivers/gpu/drm/amd/dkms/m4/kernel_write.m4 new file mode 100644 index 0000000000000..3fdd8e902d61e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/kernel_write.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit v4.13-rc7-6-ge13ec939e96b +dnl # fs: fix kernel_write prototype +dnl # +AC_DEFUN([AC_AMDGPU_KERNEL_WRITE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + kernel_write(NULL, NULL, 0, NULL); + ], [ + AC_DEFINE(HAVE_KERNEL_WRITE_PPOS, 1, + [kernel_write() take arg type of position as pointer]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kfd-close-fd.m4 b/drivers/gpu/drm/amd/dkms/m4/kfd-close-fd.m4 new file mode 100644 index 0000000000000..82b1e366bd09a --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/kfd-close-fd.m4 @@ -0,0 +1,27 @@ +dnl # +dnl # commit 8760c909f54a82aaa6e76da19afe798a0c77c3c3 +dnl # file: Rename __close_fd to close_fd and remove the files parameter +dnl # +AC_DEFUN([AC_AMDGPU_CLOSE_FD], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + close_fd(0); + ], [ + AC_DEFINE(HAVE_KERNEL_CLOSE_FD, 1, [close_fd() is available]) + ], [ + dnl # + dnl # commit 16a78543a1d3537645de737934b9387c42bfb53b + dnl # drm/amdkcl: fix for close_fd not defined + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + ksys_close(0); + ], [ + AC_DEFINE(HAVE_KSYS_CLOSE_FD, 1, [ksys_fd() is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kfifo_put.m4 b/drivers/gpu/drm/amd/dkms/m4/kfifo_put.m4 new file mode 100644 index 0000000000000..6668ba3281246 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/kfifo_put.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # v3.12-8403-g498d319bb512 +dnl # kfifo API type safety +dnl # +AC_DEFUN([AC_AMDGPU_KFIFO_PUT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + static DEFINE_KFIFO(fifo, int, 2); + kfifo_put(&fifo, 0); + ],[ + AC_DEFINE(HAVE_KFIFO_PUT_NON_POINTER, 1, + [kfifo_put() have non pointer parameter]) + ]) + ]) +]) + + + diff --git a/drivers/gpu/drm/amd/dkms/m4/krealloc-array.m4 b/drivers/gpu/drm/amd/dkms/m4/krealloc-array.m4 new file mode 100644 index 0000000000000..0cd6663de85ce --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/krealloc-array.m4 @@ -0,0 +1,40 @@ +dnl # +dnl # v5.10-13-gf0dbd2bd1c22 +dnl # mm: slab: provide krealloc_array() +dnl # +AC_DEFUN([AC_AMDGPU_KREALLOC_ARRAY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + void *p = krealloc_array(NULL, 0, 0, GFP_KERNEL); + (void)p; + ], [ + AC_DEFINE(HAVE_KREALLOC_ARRAY, 1, + [krealloc_array() is available]) + ]) + ]) +]) + +dnl # +dnl # +dnl #v5.15-11-g8587ca6f3415 mm: move kvmalloc-related functions to slab.h +dnl #v5.14-rc4-23-gde2860f46362 mm: Add kvrealloc() +dnl # +AC_DEFUN([AC_AMDGPU_KVREALLOC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + #include + ], [ + void *p = NULL; + p = kvrealloc(NULL, 0, 0, GFP_KERNEL); + ], [ + AC_DEFINE(HAVE_KVREALLOC, 1, + [kvrealloc() is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/ksys_sync_helper.m4 b/drivers/gpu/drm/amd/dkms/m4/ksys_sync_helper.m4 new file mode 100644 index 0000000000000..039aafc937e0c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/ksys_sync_helper.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit b5dee3130bb4014511f5d0dd46855ed843e3fdc8 +dnl # PM / sleep: Refactor filesystems sync to reduce duplication +dnl # +AC_DEFUN([AC_AMDGPU_KSYS_SYNC_HELPER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + ksys_sync_helper(); + ], [ + AC_DEFINE(HAVE_KSYS_SYNC_HELPER, 1, + [ksys_sync_helper() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kthread_use_mm.m4 b/drivers/gpu/drm/amd/dkms/m4/kthread_use_mm.m4 new file mode 100644 index 0000000000000..6177b3b6fa49e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/kthread_use_mm.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # f5678e7f2ac3 kernel: better document the use_mm/unuse_mm API contract +dnl # 9bf5b9eb232b kernel: move use_mm/unuse_mm to kthread.c +dnl # +AC_DEFUN([AC_AMDGPU_KTHREAD_USE_MM], [ + AC_KERNEL_DO_BACKGROUND([ + dnl # + dnl # sle sp2 server distro inlines kthread_use_mm/kthread_unuse_mm + dnl # in mmu_context.h + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + #include + #include + ], [ + kthread_use_mm(NULL); + kthread_unuse_mm(NULL); + ], [ + AC_DEFINE(HAVE_KTHREAD_USE_MM, 1, + [kthread_{use,unuse}_mm() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/ktime-get-boottime-ns.m4 b/drivers/gpu/drm/amd/dkms/m4/ktime-get-boottime-ns.m4 new file mode 100644 index 0000000000000..234ef7efa54b4 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/ktime-get-boottime-ns.m4 @@ -0,0 +1,32 @@ +dnl # +dnl # commit v5.2-rc5-8-g9285ec4c8b61 +dnl # timekeeping: Use proper clock specifier names in functions +dnl # +AC_DEFUN([AC_AMDGPU_KTIME_GET_BOOTTIME_NS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + ktime_get_boottime_ns(); + ], [ + AC_DEFINE(HAVE_KTIME_GET_BOOTTIME_NS, 1, + [ktime_get_boottime_ns() is available]) + AC_DEFINE(HAVE_KTIME_GET_NS, 1, + [ktime_get_ns is available]) + ],[ + dnl # + dnl # commit v3.16-rc5-76-g897994e32b2b + dnl # timekeeping: Provide ktime_get[*]_ns() helpers + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + ktime_get_ns(); + ], [ + AC_DEFINE(HAVE_KTIME_GET_NS, 1, + [ktime_get_ns is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/ktime-get-raw-ns.m4 b/drivers/gpu/drm/amd/dkms/m4/ktime-get-raw-ns.m4 new file mode 100644 index 0000000000000..e6ae5ff3a6fe0 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/ktime-get-raw-ns.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit v3.16-rc5-99-gf519b1a2e08c +dnl # timekeeping: Provide ktime_get_raw() +dnl # Provide a ktime_t based interface for raw monotonic time. +dnl # +AC_DEFUN([AC_AMDGPU_KTIME_GET_RAW_NS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + ktime_get_raw_ns(); + ], [ + AC_DEFINE(HAVE_KTIME_GET_RAW_NS, 1, + [ktime_get_raw_ns is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/ktime-is-union.m4 b/drivers/gpu/drm/amd/dkms/m4/ktime-is-union.m4 new file mode 100644 index 0000000000000..0bd3f631e535f --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/ktime-is-union.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v4.18-rc1-35-ga8802d97e733 +dnl # ktime: Get rid of the union +dnl # +AC_DEFUN([AC_AMDGPU_KTIME_IS_UNION], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + ktime_t t; + t.tv64 = 0; + ], [ + AC_DEFINE(HAVE_KTIME_IS_UNION, 1, + [ktime_t is union]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/linux-headers.m4 b/drivers/gpu/drm/amd/dkms/m4/linux-headers.m4 new file mode 100644 index 0000000000000..4df530c27442c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/linux-headers.m4 @@ -0,0 +1,142 @@ +AC_DEFUN([AC_AMDGPU_LINUX_HEADERS], [ + + dnl # + dnl # commit 8bd9cb51daac89337295b6f037b0486911e1b408 + dnl # locking/atomics, asm-generic: Move some macros from + dnl # to a new file + dnl # + AC_KERNEL_CHECK_HEADERS([linux/bits.h]) + + dnl # + dnl # commit v4.3-rc4-1-g2f8e2c877784 + dnl # move io-64-nonatomic*.h out of asm-generic + dnl # + AC_KERNEL_CHECK_HEADERS([linux/io-64-nonatomic-lo-hi.h]) + + dnl # + dnl # commit 299878bac326c890699c696ebba26f56fe93fc75 + dnl # treewide: move set_memory_* functions away from cacheflush.h + dnl # + AC_KERNEL_CHECK_HEADERS([asm/set_memory.h]) + + dnl # + dnl # commit df6b35f409af0a8ff1ef62f552b8402f3fef8665 + dnl # x86/fpu: Rename i387.h to fpu/api.h + dnl # + AC_KERNEL_CHECK_HEADERS([asm/fpu/api.h]) + + dnl # + dnl # v4.19-rc6-7-ga3f8a30f3f00 + dnl # Compiler Attributes: use feature checks instead of version checks + dnl # + AC_KERNEL_CHECK_HEADERS([linux/compiler_attributes.h]) + + dnl # + dnl # commit b3dfbdf261e076a997f812323edfdba84ba80256 + dnl # dma-buf/fence: add fence_array fences v6 + dnl # + AC_KERNEL_CHECK_HEADERS([linux/fence-array.h]) + + dnl # + dnl # v5.3-rc1-449-g52791eeec1d9 + dnl $ dma-buf: rename reservation_object to dma_resv + dnl # + AC_KERNEL_CHECK_HEADERS([linux/dma-resv.h]) + + dnl # + dnl # v5.7-13149-g9740ca4e95b4 + dnl # mmap locking API: initial implementation as rwsem wrappers + dnl # + AC_KERNEL_CHECK_HEADERS([linux/mmap_lock.h]) + + dnl # + dnl # v4.19-rc4-1-g52916982af48 + dnl # PCI/P2PDMA: Support peer-to-peer memory + dnl # + AC_KERNEL_CHECK_HEADERS([linux/pci-p2pdma.h]) + + dnl # + dnl # v4.7-11546-g00085f1efa38 + dnl # dma-mapping: use unsigned long for dma_attrs + dnl # + AC_KERNEL_CHECK_HEADERS([linux/dma-attrs.h]) + + dnl # + dnl # 01fd30da0474 + dnl # dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr + dnl # + AC_KERNEL_CHECK_HEADERS([linux/dma-buf-map.h]) + + dnl # + dnl # 7938f4218168 + dnl # dma-buf: dma-buf-map: Rename to iosys-map + dnl # + AC_KERNEL_CHECK_HEADERS([linux/iosys-map.h]) + + dnl # + dnl # v5.14-rc5-11-gc0891ac15f04 + dnl # isystem: ship and use stdarg.h + dnl # + AC_KERNEL_CHECK_HEADERS([linux/stdarg.h]) + + dnl # + dnl # v5.0-1331-g7bf60c52e093 + dnl # dma-buf: add new dma_fence_chain container v7 + dnl # + AC_KERNEL_CHECK_HEADERS([linux/dma-fence-chain.h]) + + dnl # + dnl # v4.16-11455-gf6bb2a2c0b81 + dnl # xarray: add the xa_lock to the radix_tree_root + dnl # + AC_KERNEL_CHECK_HEADERS([linux/xarray.h]) + + dnl # + dnl # v5.15-272-gd2a8ebbf8192 + dnl # kernel.h: split out container_of() and typeof_member() macros + dnl # + AC_KERNEL_CHECK_HEADERS([linux/container_of.h]) + + dnl # + dnl # v5.15-rc4-2-g46b49b12f3fc + dnl # arch/cc: Introduce a function to check for confidential computing features + dnl # + AC_KERNEL_CHECK_HEADERS([linux/cc_platform.h]) + + dnl # + dnl # v4.12-rc3-120-gfd851a3cdc19 + dnl # spin loop primitives for busy waiting + dnl # + AC_KERNEL_CHECK_HEADERS([linux/processor.h]) + + dnl # + dnl # v5.9-rc6-311-g0a0f0d8be76d + dnl # dma-mapping: split + dnl # + AC_KERNEL_CHECK_HEADERS([linux/dma-map-ops.h]) + + dnl #v4.5-rc3-203-g2413306c2566 + dnl #apple-gmux: Add helper for presence detect + dnl + AC_KERNEL_CHECK_HEADERS([linux/apple-gmux.h]) + + dnl #v5.5-rc2-6-ga8ae608529ab + dnl #device.h: move 'struct class' stuff out to device/class.h + dnl + AC_KERNEL_CHECK_HEADERS([linux/device/class.h]) + + dnl #v4.12-10499-gbc6245e5efd7 + dnl #bug: split BUILD_BUG stuff out into + dnl + AC_KERNEL_CHECK_HEADERS([linux/build_bug.h]) + + dnl #v6.7-rc1-2-g58e82a62669d + dnl #platform/x86/amd: Add support for AMD ACPI based Wifi band RFI mitigation feature + dnl + AC_KERNEL_CHECK_HEADERS([linux/acpi_amd_wbrf.h]) + + dnl #v5.5-5479-g23331e489361 + dnl #include/linux/units.h: add helpers for kelvin to/from Celsius conversion + dnl + AC_KERNEL_CHECK_HEADERS([linux/units.h]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/list-is-first.m4 b/drivers/gpu/drm/amd/dkms/m4/list-is-first.m4 new file mode 100644 index 0000000000000..566de635a47da --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/list-is-first.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit 70b44595eafe9c7c235f076d653a268ca1ab9fdb +dnl # Author: Mel Gorman +dnl # Date: Tue Mar 5 15:44:54 2019 -0800 +dnl # mm, compaction: use free lists to quickly locate a migration source +dnl # +AC_DEFUN([AC_AMDGPU_LIST_IS_FIRST], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + list_is_first(NULL, NULL); + ], [ + AC_DEFINE(HAVE_LIST_IS_FIRST, 1, + [list_is_first() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/list-rotate_to_front.m4 b/drivers/gpu/drm/amd/dkms/m4/list-rotate_to_front.m4 new file mode 100644 index 0000000000000..2e914f0314652 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/list-rotate_to_front.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit a16b53849913e742d086bb2b6f5e069ea2850c56 +dnl # Author: Tobin C. Harding +dnl # Date: Mon May 13 17:15:59 2019 -0700 +dnl # list: add function list_rotate_to_front() +dnl # +AC_DEFUN([AC_AMDGPU_LIST_ROTATE_TO_FRONT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + list_rotate_to_front(NULL, NULL); + ], [ + AC_DEFINE(HAVE_LIST_ROTATE_TO_FRONT, 1, + [list_rotate_to_front() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/list-sort.m4 b/drivers/gpu/drm/amd/dkms/m4/list-sort.m4 new file mode 100644 index 0000000000000..aa7b739d04961 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/list-sort.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit v5.12-rc6-9-g4f0f586bf0c8 +dnl # treewide: Change list_sort to use const pointers +dnl # +AC_DEFUN([AC_AMDGPU_LIST_CMP_FUNC_IS_CONST_PARAM], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + list_cmp_func_t cmp = NULL; + struct list_head a, b; + cmp(NULL, &a, &b); + ], [ + AC_DEFINE(HAVE_LIST_CMP_FUNC_IS_CONST_PARAM, 1, + [list_cmp_func() is const param]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/ltr_path.m4 b/drivers/gpu/drm/amd/dkms/m4/ltr_path.m4 new file mode 100644 index 0000000000000..bb3cf8a0beff9 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/ltr_path.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit c46fd358070f22ba68d6e74c22016a33b914c20a +dnl # PCI/ASPM: Enable Latency Tolerance Reporting when supported +dnl # +dnl # +AC_DEFUN([AC_AMDGPU_PCI_DEV_LTR_PATH], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct pci_dev *dev = NULL; + dev->ltr_path = 0; + ], [ + AC_DEFINE(HAVE_PCI_DEV_LTR_PATH, 1, + [strurct pci_dev->ltr_path is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/mce_notifier_prios.m4 b/drivers/gpu/drm/amd/dkms/m4/mce_notifier_prios.m4 new file mode 100644 index 0000000000000..de3f546345a28 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mce_notifier_prios.m4 @@ -0,0 +1,34 @@ +dnl # +dnl # +dnl # v5.5-rc2-5-g8438b84ab42d x86/mce: Take action on UCNA/Deferred errors again +dnl # +AC_DEFUN([AC_AMDGPU_MCE_PRIO_UC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + enum mce_notifier_prios pri; + pri = MCE_PRIO_UC; + ], [ + AC_DEFINE(HAVE_MCE_PRIO_UC, 1, + [enum MCE_PRIO_UC is available]) + ]) + ]) +]) +dnl # +dnl # v5.13-rc3-1-g94a311ce248e +dnl # x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types +dnl # +AC_DEFUN([AC_AMDGPU_SMCA_UMC_V2], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + enum smca_bank_types bank_type; + bank_type = SMCA_UMC_V2; + ], [ + AC_DEFINE(HAVE_SMCA_UMC_V2, 1, + [enum SMCA_UMC_V2 is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/memalloc-nofs-save.m4 b/drivers/gpu/drm/amd/dkms/m4/memalloc-nofs-save.m4 new file mode 100644 index 0000000000000..64d78a728898d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/memalloc-nofs-save.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit 7dea19f9ee636cb244109a4dba426bbb3e5304b7 +dnl # mm: introduce memalloc_nofs_{save,restore} API +dnl # +AC_DEFUN([AC_AMDGPU_MEMALLOC_NOFS_SAVE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + memalloc_nofs_save(); + memalloc_nofs_restore(0); + ], [ + AC_DEFINE(HAVE_MEMALLOC_NOFS_SAVE, 1, + [memalloc_nofs_{save,restore}() are available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/memalloc_noreclaim_save.m4 b/drivers/gpu/drm/amd/dkms/m4/memalloc_noreclaim_save.m4 new file mode 100644 index 0000000000000..f9d0ba9a842cf --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/memalloc_noreclaim_save.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit 4e544bac8267f65a0bf06aed1bde9964da4812ed +dnl # PCI: Add pci_dev_id() helper +dnl # +AC_DEFUN([AC_AMDGPU_MEMALLOC_NORECLAIM_SAVE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + memalloc_noreclaim_save(); + ], [ + AC_DEFINE(HAVE_MEMALLOC_NORECLAIM_SAVE, 1, + [memalloc_noreclaim_save() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/memremap-enum.m4 b/drivers/gpu/drm/amd/dkms/m4/memremap-enum.m4 new file mode 100644 index 0000000000000..53d34285e3b83 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/memremap-enum.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # commit f25cbb7a95a24ff9a2a3bebd308e303942ae6b2c +dnl # mm: add zone device coherent type memory support +dnl # +dnl # commit dd19e6d8ffaa1289d75d7833de97faf1b6b2c8e4 +dnl # mm: add device coherent vma selection for memory migration +dnl # +AC_DEFUN([AC_AMDGPU_MEMORY_DEVICE_COHERENT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + int v, w; + v = MEMORY_DEVICE_COHERENT; + w = MIGRATE_VMA_SELECT_DEVICE_COHERENT; + ], [ + AC_DEFINE(HAVE_DEVICE_COHERENT, 1, + [MEMORY_DEVICE_COHERENT is availablea]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/migrate_disable.m4 b/drivers/gpu/drm/amd/dkms/m4/migrate_disable.m4 new file mode 100644 index 0000000000000..5ffb95e258143 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/migrate_disable.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.6-rc2-1-g66630058e56b +dnl # sched/rt: Provide migrate_disable/enable() inlines +dnl # +AC_DEFUN([AC_AMDGPU_MIGRATE_DISABLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + migrate_disable(); + ],[ + AC_DEFINE(HAVE_MIGRATE_DISABLE, 1, + [migrate_disable() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/migrate_vma_fault_page.m4 b/drivers/gpu/drm/amd/dkms/m4/migrate_vma_fault_page.m4 new file mode 100644 index 0000000000000..f989b29503c45 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/migrate_vma_fault_page.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # commit v6.0-rc3-595-g16ce101db85d +dnl # mm/memory.c: fix race when faulting a device private page +dnl # +AC_DEFUN([AC_AMDGPU_MIGRATE_VMA_FAULT_PAGE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct migrate_vma mig = {0}; + struct page *fault_page = NULL; + mig.fault_page = fault_page; + ], [ + AC_DEFINE(HAVE_MIGRATE_VMA_FAULT_PAGE, 1, + [struct migrate_vma has fault_page]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/mm-kmalloc_size_roundup.m4 b/drivers/gpu/drm/amd/dkms/m4/mm-kmalloc_size_roundup.m4 new file mode 100644 index 0000000000000..108c7086638bd --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mm-kmalloc_size_roundup.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v6.0-rc2-7-g05a940656e1e +dnl # slab: Introduce kmalloc_size_roundup() +dnl # +AC_DEFUN([AC_AMDGPU_MM_KMALLOC_SIZE_ROUNDUP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + size_t a, b = 0; + a = kmalloc_size_roundup(b); + ], [ + AC_DEFINE(HAVE_KMALLOC_SIZE_ROUNDUP, 1, + [kmalloc_size_roundup is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/mm-release-pages.m4 b/drivers/gpu/drm/amd/dkms/m4/mm-release-pages.m4 new file mode 100644 index 0000000000000..7db093a925e04 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mm-release-pages.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # commit c6f92f9fbe7dbcc8903a67229aa88b4077ae4422 +dnl # mm: remove cold parameter for release_pages +dnl # +AC_DEFUN([AC_AMDGPU_MM_RELEASE_PAGES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + struct page **pages = NULL; + int nr = 0; + + release_pages(pages, nr); + ], [release_pages], [mm/swap.c], [ + AC_DEFINE(HAVE_MM_RELEASE_PAGES_2ARGS, 1, + [release_pages() wants 2 args]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/mmap_assert_write_locked.m4 b/drivers/gpu/drm/amd/dkms/m4/mmap_assert_write_locked.m4 new file mode 100644 index 0000000000000..e79d2af6625ec --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mmap_assert_write_locked.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit 7dea19f9ee636cb244109a4dba426bbb3e5304b7 +dnl # mm: introduce memalloc_nofs_{save,restore} API +dnl # +AC_DEFUN([AC_AMDGPU_MMAP_ASSERT_WRITE_LOCKED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + mmap_assert_write_locked(NULL); + ], [ + AC_DEFINE(HAVE_MMAP_ASSERT_WRITE_LOCKED, 1, + [mmap_assert_write_locked() is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/mmput_async.m4 b/drivers/gpu/drm/amd/dkms/m4/mmput_async.m4 new file mode 100644 index 0000000000000..bdfb1256a9ccb --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mmput_async.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v4.14-rc3-117-ga1b2289cef92 android: binder: drop lru lock in isolate callback +dnl # v4.13-4372-g212925802454 mm: oom: let oom_reap_task and exit_mmap run concurrently +dnl # v4.6-6601-gec8d7c14ea14 mm, oom_reaper: do not mmput synchronously from the oom reaper context +dnl # +AC_DEFUN([AC_AMDGPU_MMPUT_ASYNC], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + mmput_async(NULL); + ],[mmput_async], [kernel/fork.c], [ + AC_DEFINE(HAVE_MMPUT_ASYNC, 1, [mmput_async() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/mmu-notifier-call-srcu.m4 b/drivers/gpu/drm/amd/dkms/m4/mmu-notifier-call-srcu.m4 new file mode 100644 index 0000000000000..8b1aad73065f7 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mmu-notifier-call-srcu.m4 @@ -0,0 +1,18 @@ +dnl # commit b972216e27d1c853eced33f8638926636c606341 +dnl # mmu_notifier: add call_srcu and sync function +dnl # for listener to delay call and sync +dnl # +dnl # commit v5.3-rc5-63-gc96245148c1e +dnl # mm/mmu_notifiers: remove unregister_no_release +dnl # +AC_DEFUN([AC_AMDGPU_MMU_NOTIFIER_CALL_SRCU], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + mmu_notifier_call_srcu(NULL, NULL); + ],[ + AC_DEFINE(HAVE_MMU_NOTIFIER_CALL_SRCU, 1, [mmu_notifier_call_srcu() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/mmu-notifier-synchronize.m4 b/drivers/gpu/drm/amd/dkms/m4/mmu-notifier-synchronize.m4 new file mode 100644 index 0000000000000..a5e8dcde897ff --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mmu-notifier-synchronize.m4 @@ -0,0 +1,31 @@ +dnl # +dnl # commit v5.3-rc1-29-g2c7933f53f6b +dnl # mm/mmu_notifiers: add a get/put scheme for the registration +dnl # +dnl # amdkcl: mmu_notifier_put() & mmu_notifier_synchronize() is +dnl # introduced in the same commit, yet rhel7.7 has different behavior +dnl # +AC_DEFUN([AC_AMDGPU_MMU_NOTIFIER_PUT], [ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + mmu_notifier_put(NULL); + ],[ + AC_DEFINE(HAVE_MMU_NOTIFIER_PUT, 1, + [mmu_notifier_put() is available]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_MMU_NOTIFIER_SYNCHRONIZE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + mmu_notifier_synchronize(); + ],[ + AC_DEFINE(HAVE_MMU_NOTIFIER_SYNCHRONIZE, 1, + [mmu_notifier_synchronize() is available]) + ]) + AC_AMDGPU_MMU_NOTIFIER_PUT + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/mmu-notifier.m4 b/drivers/gpu/drm/amd/dkms/m4/mmu-notifier.m4 new file mode 100644 index 0000000000000..06742541fd0d9 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/mmu-notifier.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit 4a83bfe916f3d2100df5bc8389bd182a537ced3e +dnl # mm/mmu_notifier: helper to test if a range invalidation is blockable +dnl # +AC_DEFUN([AC_AMDGPU_MMU_NOTIFIER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + mmu_notifier_range_blockable(NULL); + ], [ + AC_DEFINE(HAVE_MMU_NOTIFIER_RANGE_BLOCKABLE, 1, + [mmu_notifier_range_blockable() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pci-configure-extended-tags.m4 b/drivers/gpu/drm/amd/dkms/m4/pci-configure-extended-tags.m4 new file mode 100644 index 0000000000000..c46dfedbf7a34 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pci-configure-extended-tags.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit 62ce94a7a5a54aac80975f5e6731707225d4077e +dnl # PCI: Mark Broadcom HT2100 Root Port Extended Tags as broken +dnl # +AC_DEFUN([AC_AMDGPU_PCI_CONFIGURE_EXTENDED_TAGS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct pci_host_bridge bridge; + bridge.no_ext_tags = 0; + ], [ + AC_DEFINE(HAVE_PCI_CONFIGURE_EXTENDED_TAGS, 1, + [PCI driver handles extended tags]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pci-dev-id.m4 b/drivers/gpu/drm/amd/dkms/m4/pci-dev-id.m4 new file mode 100644 index 0000000000000..0138c0b995d3c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pci-dev-id.m4 @@ -0,0 +1,38 @@ +dnl # +dnl # commit 4e544bac8267f65a0bf06aed1bde9964da4812ed +dnl # PCI: Add pci_dev_id() helper +dnl # +AC_DEFUN([AC_AMDGPU_PCI_DEV_ID], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + pci_dev_id(NULL); + ], [ + AC_DEFINE(HAVE_PCI_DEV_ID, 1, + [pci_dev_id() is available]) + ]) + ]) +]) + +dnl # +dnl # commit: v6.6-rc1-1-gd427da2323b0 +dnl # PCI: Add pci_get_base_class() helper +dnl # +AC_DEFUN([AC_AMDGPU_PCI_GET_BASE_CLASS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + pci_get_base_class(0, NULL); + ], [pci_get_base_class], [drivers/pci/search.c], [ + AC_DEFINE(HAVE_PCI_GET_BASE_CLASS, 1, + [pci_get_base_class() is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_PCI], [ + AC_AMDGPU_PCI_DEV_ID + AC_AMDGPU_PCI_GET_BASE_CLASS +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pci-driver-dev-groups.m4 b/drivers/gpu/drm/amd/dkms/m4/pci-driver-dev-groups.m4 new file mode 100644 index 0000000000000..dfb7bd92cade1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pci-driver-dev-groups.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit ded13b9cfd595adb478a1e371d2282048bba1df5 +dnl # PCI: Add support for dev_groups to struct pci_driver +dnl # +AC_DEFUN([AC_AMDGPU_PCI_DRIVER_DEV_GROUPS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct pci_driver *pd = NULL; + pd->dev_groups = NULL; + ], [ + AC_DEFINE(HAVE_PCI_DRIVER_DEV_GROUPS, 1, [struct pci_driver has field dev_groups]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pci-upstream-bridge.m4 b/drivers/gpu/drm/amd/dkms/m4/pci-upstream-bridge.m4 new file mode 100644 index 0000000000000..7d8e48fd14555 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pci-upstream-bridge.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit c6bde215acfd637708142ae671843b6f0eadbc6d +dnl # Author: Bjorn Helgaas +dnl # Date: Wed Nov 6 10:11:48 2013 -0700 +dnl # PCI: Add pci_upstream_bridge() +dnl # +AC_DEFUN([AC_AMDGPU_PCI_UPSTREAM_BRIDGE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + pci_upstream_bridge(NULL); + ], [ + AC_DEFINE(HAVE_PCI_UPSTREAM_BRIDGE, 1, + [pci_upstream_bridge() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pci_pr3_present.m4 b/drivers/gpu/drm/amd/dkms/m4/pci_pr3_present.m4 new file mode 100644 index 0000000000000..e0fbc073caf06 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pci_pr3_present.m4 @@ -0,0 +1,15 @@ +dnl # +dnl # v5.4-rc2-37-g52525b7a3cf8 +dnl # PCI: Add a helper to check Power Resource Requirements _PR3 existence +dnl # +AC_DEFUN([AC_AMDGPU_PCI_PR3_PRESENT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + pci_pr3_present(NULL); + ],[pci_pr3_present], [drivers/pci/pci.c], [ + AC_DEFINE(HAVE_PCI_PR3_PRESENT, 1, [pci_pr3_present() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pci_rebar_bytes_to_size.m4 b/drivers/gpu/drm/amd/dkms/m4/pci_rebar_bytes_to_size.m4 new file mode 100644 index 0000000000000..01d066282244a --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pci_rebar_bytes_to_size.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit 192f1bf7559e895d51f81c3976c5892c8b1e0601 +dnl # PCI: Add pci_rebar_bytes_to_size() +dnl # +AC_DEFUN([AC_AMDGPU_PCI_REBAR_BYTES_TO_SIZE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + pci_rebar_bytes_to_size(0); + ], [ + AC_DEFINE(HAVE_PCI_REBAR_BYTES_TO_SIZE, 1, + [pci_rebar_bytes_to_size() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pcie-aspm-enabled.m4 b/drivers/gpu/drm/amd/dkms/m4/pcie-aspm-enabled.m4 new file mode 100644 index 0000000000000..32927a35d28f5 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pcie-aspm-enabled.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.3-rc4-1-gaccd2dd72c8f +dnl # PCI/ASPM: Add pcie_aspm_enabled() +dnl # +AC_DEFUN([AC_AMDGPU_PCIE_ASPM_ENABLED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + pcie_aspm_enabled(NULL); + ], [ + AC_DEFINE(HAVE_PCIE_ASPM_ENABLED, 1, + [pcie_aspm_enabled() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pid_type.m4 b/drivers/gpu/drm/amd/dkms/m4/pid_type.m4 new file mode 100644 index 0000000000000..da986da3833f3 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pid_type.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v4.18-rc1-6-g6883f81aac6f +dnl # pid: Implement PIDTYPE_TGID +dnl # +AC_DEFUN([AC_AMDGPU_PID_TYPE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + enum pid_type a; + a = PIDTYPE_TGID; + ], [ + AC_DEFINE(HAVE_PIDTYPE_TGID, 1, + [PIDTYPE is availablea]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pm-suspend-via-firmware.m4 b/drivers/gpu/drm/amd/dkms/m4/pm-suspend-via-firmware.m4 new file mode 100644 index 0000000000000..d5bcda40a4d71 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pm-suspend-via-firmware.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v4.3-rc5-6-gef25ba047601 +dnl # PM / sleep: Add flags to indicate platform firmware involvement +dnl # +AC_DEFUN([AC_AMDGPU_PM_SUSPEND_VIA_FIRMWARE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + pm_suspend_via_firmware(); + ],[ + AC_DEFINE(HAVE_PM_SUSPEND_VIA_FIRMWARE, + 1, + [pm_suspend_via_firmware() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pm_suspend_target_state.m4 b/drivers/gpu/drm/amd/dkms/m4/pm_suspend_target_state.m4 new file mode 100644 index 0000000000000..7f4394902241d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pm_suspend_target_state.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit edf3ad32f18b0ea7d27ea9420f3bb9b2c850b48b +dnl # drm/amd: Warn users about potential s0ix problems +dnl # +AC_DEFUN([AC_AMDGPU_PM_SUSPEND_TARGET_STATE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + pm_suspend_target_state = PM_SUSPEND_TO_IDLE; + ],[ + AC_DEFINE(HAVE_PM_SUSPEND_TARGET_STATE, + 1, + [pm_suspend_target_state is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/processor.m4 b/drivers/gpu/drm/amd/dkms/m4/processor.m4 new file mode 100644 index 0000000000000..66dececcd8989 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/processor.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # commit v6.6-rc1-4-gb9655e702dc5 +dnl # x86/cpu: Encapsulate topology information in cpuinfo_x86 +dnl # +AC_DEFUN([AC_AMDGPU_CPUINFO_X86], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct cpuinfo_x86* cpuinfo = NULL; + struct cpuinfo_topology topo; + topo = cpuinfo -> topo; + ],[ + AC_DEFINE(HAVE_CPUINFO_TOPOLOGY_IN_CPUINFO_X86_STRUCT, 1, + [ cpuinfo_x86.topo is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/pxm_to_node.m4 b/drivers/gpu/drm/amd/dkms/m4/pxm_to_node.m4 new file mode 100644 index 0000000000000..a69d9f2264f6c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/pxm_to_node.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.7-20-gf2af6d3978d7 +dnl # virtio-mem: Allow to specify an ACPI PXM as nid +dnl # +AC_DEFUN([AC_AMDGPU_PXM_TO_NODE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + pxm_to_node(0); + ],[pxm_to_node], [drivers/acpi/numa/srat.c], [ + AC_DEFINE(HAVE_PXM_TO_NODE, 1, + [pxm_to_node() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/radix-tree-iter-delete.m4 b/drivers/gpu/drm/amd/dkms/m4/radix-tree-iter-delete.m4 new file mode 100644 index 0000000000000..ed747fab8a781 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/radix-tree-iter-delete.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v4.10-rc5-380-g0ac398ef391b +dnl # radix-tree: Add radix_tree_iter_delete +dnl # +AC_DEFUN([AC_AMDGPU_RADIX_TREE_ITER_DELETE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + radix_tree_iter_delete(NULL,NULL,NULL); + ], [ + AC_DEFINE(HAVE_RADIX_TREE_ITER_DELETE, 1, + [radix_tree_iter_delete() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/rbtree.m4 b/drivers/gpu/drm/amd/dkms/m4/rbtree.m4 new file mode 100644 index 0000000000000..0a29c2b864323 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/rbtree.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # v5.11-20-g2d24dd5798d0 +dnl # rbtree: Add generic add and find helpers +dnl # +AC_DEFUN([AC_AMDGPU_RB_ADD_CACHED], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + rb_add_cached(NULL, NULL, NULL); + ],[ + AC_DEFINE(HAVE_RB_ADD_CACHED, 1, + [rb_add_cached is available]) + ]) + ]) +]) + + + diff --git a/drivers/gpu/drm/amd/dkms/m4/register_shrinker.m4 b/drivers/gpu/drm/amd/dkms/m4/register_shrinker.m4 new file mode 100644 index 0000000000000..98c49b53ddc6f --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/register_shrinker.m4 @@ -0,0 +1,41 @@ +dnl # +dnl # v5.16-rc1-22-g91f75eb481cf x86/MCE/AMD, EDAC/mce_amd: Support non-uniform MCA bank type enumeration +dnl # +AC_DEFUN([AC_AMDGPU_REGISTER_SHRINKER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + #include + ],[ + struct shrinker *a = NULL; + const char *b = NULL; + register_shrinker(a, b); + ],[ + AC_DEFINE(HAVE_REGISTER_SHRINKER_WITH_TWO_ARGUMENTS, 1, + [whether register_shrinker(x, x) is available]) + ]) + ]) +]) + +dnl # +dnl # commit: v6.6-rc4-53-gc42d50aefd17 +dnl # mm: shrinker: add infrastructure for dynamically allocating shrinker +dnl # +AC_DEFUN([AC_AMDGPU_SHRINKER_REGISTER], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + shrinker_register(NULL); + ], [shrinker_register], [mm/shrinker.c], [ + AC_DEFINE(HAVE_SHRINKER_REGISTER, 1, + [shrinker_register() is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_SHRINKER], [ + AC_AMDGPU_REGISTER_SHRINKER + AC_AMDGPU_SHRINKER_REGISTER +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/sched-list-for-each-entry.m4 b/drivers/gpu/drm/amd/dkms/m4/sched-list-for-each-entry.m4 new file mode 100644 index 0000000000000..4f993d9da6fa3 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/sched-list-for-each-entry.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # 4.13 API change +dnl # commit ac6424b981bce1c4bc55675c6ce11bfe1bbfa64f +dnl # Renamed wait_queue_head::task_list -> wait_queue_head::head +dnl # Renamed wait_queue_entry::task_list -> wait_queue_entry::entry +dnl # +AC_DEFUN([AC_AMDGPU_LIST_FOR_EACH_ENTRY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + wait_queue_entry_t *wq_entry = NULL; + wait_queue_head_t *wq_head = NULL; + + __add_wait_queue(wq_head, wq_entry); + ], [ + AC_DEFINE(HAVE_WAIT_QUEUE_ENTRY, 1, + [wait_queue_entry_t exists]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/seq-hex-dump.m4 b/drivers/gpu/drm/amd/dkms/m4/seq-hex-dump.m4 new file mode 100644 index 0000000000000..5765baa40af2d --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/seq-hex-dump.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # commit 37607102c4426cf92aeb5da1b1d9a79ba6d95e3f +dnl # seq_file: provide an analogue of print_hex_dump() +dnl # +AC_DEFUN([AC_AMDGPU_SEQ_HEX_DUMP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + seq_hex_dump(NULL,NULL,0,0,0,NULL,0,0); + ], [seq_hex_dump],[fs/seq_file.c], [ + AC_DEFINE(HAVE_SEQ_HEX_DUMP, 1, + [seq_hex_dump() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/smca_get_bank_type.m4 b/drivers/gpu/drm/amd/dkms/m4/smca_get_bank_type.m4 new file mode 100644 index 0000000000000..4dbfe78524f84 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/smca_get_bank_type.m4 @@ -0,0 +1,49 @@ +dnl # +dnl # v5.16-rc1-22-g91f75eb481cf x86/MCE/AMD, EDAC/mce_amd: Support non-uniform MCA bank type enumeration +dnl # +AC_DEFUN([AC_AMDGPU_SMCA_GET_BANK_TYPE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + unsigned int a = 0, b = 0; + enum smca_bank_types bank_type; + bank_type = smca_get_bank_type(a, b); + ],[ + AC_DEFINE(HAVE_SMCA_GET_BANK_TYPE_WITH_TWO_ARGUMENTS, 1, + [whether smca_get_bank_type(x, x) is available]) + ],[ + dnl # + dnl # v5.15-rc2-452-gf38ce910d8df x86/MCE/AMD: Export smca_get_bank_type symbol + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + unsigned int a = 0; + enum smca_bank_types bank_type; + bank_type = smca_get_bank_type(a); + ],[ + AC_DEFINE(HAVE_SMCA_GET_BANK_TYPE_WITH_ONE_ARGUMENT, 1, + [smca_get_bank_type(x) is available]) + ],[ + dnl # + dnl # v4.9-rc4-4-g79349f529ab1 x86/RAS: Simplify SMCA bank descriptor struct + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + struct smca_bank *b = NULL; + b->id = 0; + ], [ + AC_DEFINE(HAVE_STRUCT_SMCA_BANK, 1, + [struct smca_bank is available]) + ]) + + ]) + ]) + + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/str_yes_no.m4 b/drivers/gpu/drm/amd/dkms/m4/str_yes_no.m4 new file mode 100644 index 0000000000000..5aefabf94021b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/str_yes_no.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit ea4692c75e1c63926e4fb0728f5775ef0d733888 +dnl # lib/string_helpers: Consolidate string helpers implementation +dnl # +AC_DEFUN([AC_AMDGPU_STR_YES_NO], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + const char *str; + str = str_yes_no(true); + ], [ + AC_DEFINE(HAVE_STR_YES_NO, 1, + [str_yes_no() is defined]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_attribute_group.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_attribute_group.m4 new file mode 100644 index 0000000000000..80990947459d3 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_attribute_group.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v4.3-rc4-9-g7f5028cf6190 +dnl # sysfs: Support is_visible() on binary attributes +dnl # +AC_DEFUN([AC_AMDGPU_ATTRIBUTE_GROUP_IS_BIN_VISIBLE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct attribute_group *amdgpu_attr_group = NULL; + amdgpu_attr_group->is_bin_visible = NULL; + ],[ + AC_DEFINE(HAVE_ATTRIBUTE_GROUP_IS_BIN_VISIBLE, 1, + [amdgpu_attr_group->is_bin_visible is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_drm_color_ctm_3x4.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_drm_color_ctm_3x4.m4 new file mode 100644 index 0000000000000..cef143831f813 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_drm_color_ctm_3x4.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # v6.5-2548-g2d4457c2d03e +dnl # drm/amd/display: Add 3x4 CTM support for plane CTM +dnl # +AC_DEFUN([AC_AMDGPU_DRM_COLOR_CTM_3X4], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_color_ctm_3x4 *ctm = NULL; + ctm->matrix[0] = 0; + ],[ + AC_DEFINE(HAVE_DRM_COLOR_CTM_3X4, 1, + [struct drm_color_ctm_3x4 is available]) + ]) + ]) +]) + + + diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_drm_connector_state.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_drm_connector_state.m4 new file mode 100644 index 0000000000000..845426d6fe7bf --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_drm_connector_state.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # commit v5.0-rc7-1020-gd2c6a405846c +dnl # drm: Add HDMI colorspace property +dnl # +AC_DEFUN([AC_AMDGPU_DRM_CONNECTOR_STATE_COLORSPACE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_connector_state *connector_state = NULL; + connector_state->colorspace = 0; + ],[ + AC_DEFINE(HAVE_DRM_CONNECTOR_STATE_COLORSPACE, 1, + [drm_connector_state->colorspace is available]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_CONNECTOR_STATE], [ + AC_AMDGPU_DRM_CONNECTOR_STATE_COLORSPACE +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_drm_crtc_funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_drm_crtc_funcs.m4 new file mode 100644 index 0000000000000..6b53674fb88bc --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_drm_crtc_funcs.m4 @@ -0,0 +1,39 @@ +dnl # +dnl # v5.5-rc2-1557-ge3eff4b5d91e drm/amdgpu: Convert to CRTC VBLANK callbacks +dnl # v5.5-rc2-1556-gea702333e567 drm/amdgpu: Convert to struct drm_crtc_helper_funcs.get_scanout_position() +dnl # v5.5-rc2-1555-g7fe3f0d15aac drm: Add get_vblank_timestamp() to struct drm_crtc_funcs +dnl # v5.5-rc2-1554-gf1e2b6371c12 drm: Add get_scanout_position() to struct drm_crtc_helper_funcs +dnl # +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_crtc_funcs *ptr = NULL; + ptr->get_vblank_timestamp(NULL, NULL, NULL, 0); + ],[ + AC_DEFINE(HAVE_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP, + 1, + [struct drm_crtc_funcs->get_vblank_timestamp() is available]) + ]) + ]) +]) + +dnl # +dnl # v5.10-1961-g6ca2ab8086af drm: automatic legacy gamma support +dnl # +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_CRTC_FUNCS_GAMMA_SET_OPTIONAL], [ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + drm_atomic_helper_legacy_gamma_set(NULL, NULL, NULL, NULL, 0, NULL); + ], [drm_atomic_helper_legacy_gamma_set], [drivers/gpu/drm/drm_atomic_helper.c],[],[ + AC_DEFINE(HAVE_STRUCT_DRM_CRTC_FUNCS_GAMMA_SET_OPTIONAL, 1, + [HAVE_STRUCT_DRM_CRTC_FUNCS_GAMMA_SET_OPTIONAL is available]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_CRTC_FUNCS], [ + AC_AMDGPU_STRUCT_DRM_CRTC_FUNCS_GET_VBLANK_TIMESTAMP + AC_AMDGPU_STRUCT_DRM_CRTC_FUNCS_GAMMA_SET_OPTIONAL +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_drm_device.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_drm_device.m4 new file mode 100644 index 0000000000000..929e3edc5f603 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_drm_device.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # commit v5.5-rc2-1419-g7e13ad896484 +dnl # drm: Avoid drm_global_mutex for simple inc/dec of dev->open_count +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DEVICE_OPEN_COUNT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_device *ddev = NULL; + ddev->open_count = 0; + ],[ + AC_DEFINE(HAVE_DRM_DEVICE_OPEN_COUNT_INT, 1, + [drm_device->open_count is int]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_DEVICE], [ + AC_AMDGPU_DRM_DEVICE_OPEN_COUNT +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_drm_dp_mst_branch.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_drm_dp_mst_branch.m4 new file mode 100644 index 0000000000000..753914f3cc392 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_drm_dp_mst_branch.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # v5.6-rc7-127-gd01cd62400b3 +dnl # uuid: Add inline helpers to import / export UUIDs +dnl # +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_BRANCH_GUID_T], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct drm_dp_mst_branch mst_primary; + const guid_t guid; + guid_copy(&mst_primary.guid, &guid); + ],[ + AC_DEFINE(HAVE_DRM_DP_MST_BRANCH_GUID_T, 1, + [the guid of struct drm_dp_mst_branch is guid_t]) + ]) + ]) +]) + + + diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_drm_plane_helper_funcs.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_drm_plane_helper_funcs.m4 new file mode 100644 index 0000000000000..495dd9ef97c12 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_drm_plane_helper_funcs.m4 @@ -0,0 +1,19 @@ +dnl # commit v5.11-rc2-701-g7c11b99a8e58 +dnl # drm/atomic: Pass the full state to planes atomic_check +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_plane_helper_funcs *funcs = NULL; + funcs->atomic_check(NULL, (struct drm_atomic_state *)NULL); + ], [ + AC_DEFINE(HAVE_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS, 1, + [drm_plane_helper_funcs->atomic_check() second param wants drm_atomic_state arg]) + ]) + ]) +]) + +AC_DEFUN([AC_AMDGPU_STRUCT_DRM_PLANE_HELPER_FUNCS], [ + AC_AMDGPU_STRUCT_DRM_PLANE_HELPER_FUNCS_ATOMIC_CHECK_DRM_ATOMIC_STATE_PARAMS +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/struct_kobj_type.m4 b/drivers/gpu/drm/amd/dkms/m4/struct_kobj_type.m4 new file mode 100644 index 0000000000000..a698f80362f2c --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/struct_kobj_type.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # commit aa30f47cf666111f6bbfd15f290a27e8a7b9d854 +dnl # kobject: Add support for default attribute groups to kobj_type +dnl # + +AC_DEFUN([AC_AMDGPU_STRUCT_KOBJ_TYPE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + struct kobj_type *k_type = NULL; + k_type->default_groups = NULL; + ],[ + AC_DEFINE(HAVE_DEFAULT_GROUP_IN_KOBJ_TYPE, 1, + [kobj_type->default_groups is available]) + ],[ + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/svm.m4 b/drivers/gpu/drm/amd/dkms/m4/svm.m4 new file mode 100644 index 0000000000000..17657770988f5 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/svm.m4 @@ -0,0 +1,36 @@ +dnl # +dnl # v5.8-rc4-7-g5143192cd410 mm/migrate: add a flags parameter to migrate_vma +dnl # +AC_DEFUN([AC_AMDGPU_MIGRATE_VMA_PGMAP_OWNER], [ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct migrate_vma *migrate = NULL; + migrate->pgmap_owner = NULL; + ], [ + AC_DEFINE(HAVE_MIGRATE_VMA_PGMAP_OWNER, 1, + [migrate_vma->pgmap_owner is available]) + ]) +]) + +dnl # +dnl # v5.6-rc3-15-g800bb1c8dc80 mm: handle multiple owners of device private pages in migrate_vma +dnl # v5.6-rc3-14-gf894ddd5ff01 memremap: add an owner field to struct dev_pagemap +dnl # +AC_DEFUN([AC_AMDGPU_HSA_AMD_SVM], [ + AC_KERNEL_TRY_COMPILE([ + #include + #if !IS_ENABLED(CONFIG_DEVICE_PRIVATE) + #error "DEVICE_PRIVATE is a must for svm support" + #endif + ], [ + struct dev_pagemap *pm = NULL; + pm->owner = NULL; + ], [ + AC_DEFINE(HAVE_DEV_PAGEMAP_OWNER, 1, + [dev_pagemap->owner is available]) + + AC_AMDGPU_MIGRATE_VMA_PGMAP_OWNER + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/synchronize-shrinkers.m4 b/drivers/gpu/drm/amd/dkms/m4/synchronize-shrinkers.m4 new file mode 100644 index 0000000000000..9429112a66d2b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/synchronize-shrinkers.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v5.14-rc3-760-g880121be1179 +dnl # mm/vmscan: add sync_shrinkers function v3 +dnl # +AC_DEFUN([AC_AMDGPU_SYNCHRONIZE_SHRINKERS], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + #include + ], [ + synchronize_shrinkers(); + ], [synchronize_shrinkers], [mm/vmscan.c], [ + AC_DEFINE(HAVE_SYNCHRONIZE_SHRINKERS, 1, + [synchronize_shrinkers() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/sysfs_emit.m4 b/drivers/gpu/drm/amd/dkms/m4/sysfs_emit.m4 new file mode 100644 index 0000000000000..7c355cae6ed01 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/sysfs_emit.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit: v5.9-rc5-23-g2efc459d06f1 +dnl # sysfs: Add sysfs_emit and sysfs_emit_at +dnl # to format sysfs output +AC_DEFUN([AC_AMDGPU_SYSFS_EMIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ],[ + sysfs_emit(NULL, NULL); + sysfs_emit_at(NULL, 0, NULL); + ],[sysfs_emit sysfs_emit_at],[fs/sysfs/file.c], [ + AC_DEFINE(HAVE_SYSFS_EMIT, 1, + [sysfs_emit() and sysfs_emit_at() are available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/totalram_pages.m4 b/drivers/gpu/drm/amd/dkms/m4/totalram_pages.m4 new file mode 100644 index 0000000000000..f57daa513df71 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/totalram_pages.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v4.20-6506-gca79b0c211af +dnl # mm: convert totalram_pages and totalhigh_pages variables to atomic +dnl # +AC_DEFUN([AC_AMDGPU_TOTALRAM_PAGES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + unsigned long ret; + ret = totalram_pages(); + ], [ + AC_DEFINE(HAVE_TOTALRAM_PAGES, 1, + [totalram_pages() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/ttm_buffer_object.m4 b/drivers/gpu/drm/amd/dkms/m4/ttm_buffer_object.m4 new file mode 100644 index 0000000000000..ed0b163902c11 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/ttm_buffer_object.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # v5.3-rc1-374-ge7f0141a217f drm/ttm: drop ttm_buffer_object->resv +dnl # v5.3-rc1-370-g5a5011a72489 drm/amdgpu: switch driver from bo->resv to bo->base.resv +dnl # v5.3-rc1-367-ge532a135d704 drm/ttm: switch ttm core from bo->resv to bo->base.resv +dnl # v5.3-rc1-365-gb96f3e7c8069 drm/ttm: use gem vma_node +dnl # v5.3-rc1-364-g1e053b10ba60 drm/ttm: use gem reservation object +dnl # v5.3-rc1-362-gc105de2828e1 drm/amdgpu: use embedded gem object +dnl # v5.3-rc1-358-g8eb8833e7ed3 drm/ttm: add gem base object +dnl # v5.0-rc1-1004-g1ba627148ef5 drm: Add reservation_object to drm_gem_object +dnl # +AC_DEFUN([AC_AMDGPU_TTM_BUFFER_OBJECT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct drm_gem_object *gem_obj = NULL; + gem_obj->resv = &gem_obj->_resv; + ], [ + AC_DEFINE(HAVE_DRM_GEM_OBJECT_RESV, 1, + [drm_gem_object->resv/_resv is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/type--poll-t.m4 b/drivers/gpu/drm/amd/dkms/m4/type--poll-t.m4 new file mode 100644 index 0000000000000..3ac6f9d5a31ea --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/type--poll-t.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # commit v4.15-rc1-4-g8ced390c2b18 +dnl # define __poll_t, annotate constants +dnl # +AC_DEFUN([AC_AMDGPU_TYPE__POLL_T], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + __poll_t mask; + mask = 0; + ],[ + AC_DEFINE(HAVE_TYPE__POLL_T, 1, [__poll_t is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/vga-client-register.m4 b/drivers/gpu/drm/amd/dkms/m4/vga-client-register.m4 new file mode 100644 index 0000000000000..603da40bd05db --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/vga-client-register.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # v5.13-rc3-1630-gbf44e8cecc03 +dnl # vgaarb: don't pass a cookie to vga_client_register +dnl # +AC_DEFUN([AC_AMDGPU_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + struct pci_dev; + ], [ + unsigned int (*callback)(struct pci_dev *, bool) = NULL; + vga_client_register(NULL, callback); + ], [ + AC_DEFINE(HAVE_VGA_CLIENT_REGISTER_NOT_PASS_COOKIE, 1, + [vga_client_register() don't pass a cookie]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/vga_remove_vgacon.m4 b/drivers/gpu/drm/amd/dkms/m4/vga_remove_vgacon.m4 new file mode 100644 index 0000000000000..f95a903f3143b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/vga_remove_vgacon.m4 @@ -0,0 +1,16 @@ +dnl # +dnl # v5.0-rc1-998-gc6b38fbbde91 +dnl # drm: move i915_kick_out_vgacon to vgaarb +dnl # +AC_DEFUN([AC_AMDGPU_VGA_REMOVE_VGACON], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + vga_remove_vgacon(NULL); + ], [ + AC_DEFINE(HAVE_VGA_REMOVE_VGACON, 1, + [vga_remove_vgacon() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/vm_operations_struct.m4 b/drivers/gpu/drm/amd/dkms/m4/vm_operations_struct.m4 new file mode 100644 index 0000000000000..111d006f1ac28 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/vm_operations_struct.m4 @@ -0,0 +1,34 @@ +dnl # +dnl # commit v4.10-9602-g11bac8000449 +dnl # mm, fs: reduce fault, page_mkwrite, and pfn_mkwrite to take only vmf +dnl # +AC_DEFUN([AC_AMDGPU_VM_OPERATIONS_STRUCT_FAULT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct vm_operations_struct *vm_ops = NULL; + vm_ops->fault(NULL); + ], [ + AC_DEFINE(HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG, 1, + [vm_operations_struct->fault() wants 1 arg]) + AC_DEFINE(HAVE_VM_FAULT_ADDRESS_VMA, 1, + [vm_fault->{address/vma} is available]) + ], [ + dnl # + dnl # commit v4.9-7746-g82b0f8c39a38 + dnl # mm: join struct fault_env and vm_fault + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct vm_fault *ptest = NULL; + ptest->address = 0; + ptest->vma = NULL; + ], [ + AC_DEFINE(HAVE_VM_FAULT_ADDRESS_VMA, 1, + [vm_fault->{address/vma} is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/vma-lookup.m4 b/drivers/gpu/drm/amd/dkms/m4/vma-lookup.m4 new file mode 100644 index 0000000000000..537ee8180393a --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/vma-lookup.m4 @@ -0,0 +1,53 @@ +dnl # +dnl # v5.13-105-gce6d42f2e4a2 +dnl # mm: add vma_lookup(), update find_vma_intersection() comments +dnl # +AC_DEFUN([AC_AMDGPU_VMA_LOOKUP], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + vma_lookup(NULL, 0); + ], [ + AC_DEFINE(HAVE_VMA_LOOKUP, 1, + [vma_lookup() is available]) + ]) + ]) +]) + +dnl # +dnl # v6.2-rc4-446-gbc292ab00f6c +dnl # mm: introduce vma->vm_flags wrapper functions +dnl # +AC_DEFUN([AC_AMDGPU_VM_FLAGS_SET], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + vm_flags_set(NULL, 0); + vm_flags_clear(NULL, 0); + ], [ + AC_DEFINE(HAVE_VM_FLAGS_SET, 1, + [vm_flags_{set, clear} is available]) + ]) + ]) +]) + +dnl # +dnl # v6.5-rc4-265-g11250fd12eb8 +dnl # mm: factor out VMA stack and heap checks +dnl # +AC_DEFUN([AC_AMDGPU_VMA_IS_INITIAL], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + vma_is_initial_heap(NULL); + vma_is_initial_stack(NULL); + ], [ + AC_DEFINE(HAVE_VMA_IS_INITIAL_HEAP, 1, + [vma_is_initial_{heap, stack} is available]) + ]) + ]) +]) + diff --git a/drivers/gpu/drm/amd/dkms/m4/vmf-insert.m4 b/drivers/gpu/drm/amd/dkms/m4/vmf-insert.m4 new file mode 100644 index 0000000000000..89789fd059839 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/vmf-insert.m4 @@ -0,0 +1,45 @@ +dnl # +dnl # commit v4.4-6466-g34c0fd540e79 +dnl # mm, dax, pmem: introduce pfn_t +dnl # +AC_DEFUN([AC_AMDGPU_VMF_INSERT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + pfn_t pfn; + pfn.val = 0; + ], [ + dnl # + dnl # commit v4.16-7358-g1c8f422059ae + dnl # mm: change return type to vm_fault_t + dnl # + AC_DEFINE(HAVE_PFN_T, 1, [pfn_t is defined]) + + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + pfn_t pfn = {}; + vmf_insert_mixed(NULL, 0, pfn); + vmf_insert_pfn(NULL, 0, 0); + ], [ + AC_DEFINE(HAVE_VMF_INSERT, 1, + [vmf_insert_*() are available]) + ], [ + dnl # + dnl # commit v4.4-6475-g01c8f1c44b83 + dnl # mm, dax, gpu: convert vm_insert_mixed to pfn_t + dnl # + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + pfn_t pfn = {}; + vm_insert_mixed(NULL, 0, pfn); + ], [vm_insert_mixed], [mm/memory.c], [ + AC_DEFINE(HAVE_PFN_T_VM_INSERT_MIXED, 1, + [vm_insert_mixed() wants pfn_t arg]) + ]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/vmf_insert_mixed_prot.m4 b/drivers/gpu/drm/amd/dkms/m4/vmf_insert_mixed_prot.m4 new file mode 100644 index 0000000000000..53da9747196ea --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/vmf_insert_mixed_prot.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # 5379e4dd3220 mm, drm/ttm: Fix vm page protection handling +dnl # 574c5b3d0e4c mm: Add a vmf_insert_mixed_prot() function +dnl # +AC_DEFUN([AC_AMDGPU_VMF_INSERT_MIXED_PROT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + #include + ],[ + pfn_t pfn; + pgprot_t prot; + vmf_insert_mixed_prot(NULL, 0, pfn, prot); + ],[vmf_insert_mixed_prot],[mm/memory.c],[ + AC_DEFINE(HAVE_VMF_INSERT_MIXED_PROT, + 1, + [vmf_insert_mixed_prot() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/vmf_insert_pfn_prot.m4 b/drivers/gpu/drm/amd/dkms/m4/vmf_insert_pfn_prot.m4 new file mode 100644 index 0000000000000..192bb1767dda5 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/vmf_insert_pfn_prot.m4 @@ -0,0 +1,35 @@ +dnl # +dnl # commit v4.19-6927-gf5e6d1d5f8f3 +dnl # mm: introduce vmf_insert_pfn_prot() +dnl # +AC_DEFUN([AC_AMDGPU_VMF_INSERT_PFN_PROT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + pgprot_t prot = {0}; + vmf_insert_pfn_prot(NULL, 0, 0, prot); + ],[ + AC_DEFINE(HAVE_VMF_INSERT_PFN_PROT, + 1, + [vmf_insert_pfn_prot() is available]) + ],[ + dnl # + dnl # commit v4.4-528-g1745cbc5d0de + dnl # mm: Add vm_insert_pfn_prot() + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + #include + ],[ + pgprot_t prot = {0}; + vm_insert_pfn_prot(NULL, 0, 0, prot); + ],[ + AC_DEFINE(HAVE_VM_INSERT_PFN_PROT, + 1, + [vm_insert_pfn_prot() is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/want_init_on_free.m4 b/drivers/gpu/drm/amd/dkms/m4/want_init_on_free.m4 new file mode 100644 index 0000000000000..47463793e94b1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/want_init_on_free.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v5.2-5754-g6471384af2a6 +dnl # mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options +dnl # +AC_DEFUN([AC_AMDGPU_WANT_INIT_ON_FREE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + bool r; + r = want_init_on_free(); + ], [ + AC_DEFINE(HAVE_WANT_INIT_ON_FREE, 1, + [want_init_on_free() is available]) + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/ww_mutex_trylock.m4 b/drivers/gpu/drm/amd/dkms/m4/ww_mutex_trylock.m4 new file mode 100644 index 0000000000000..9d87289b3bfc8 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/ww_mutex_trylock.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v5.15-rc1-1-g12235da8c80a +dnl # kernel/locking: Add context to ww_mutex_trylock() +dnl # +AC_DEFUN([AC_AMDGPU_WW_MUTEX_TRYLOCK_CONTEXT_ARG], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + int r; + r = ww_mutex_trylock(NULL, NULL); + ], [ + AC_DEFINE(HAVE_WW_MUTEX_TRYLOCK_CONTEXT_ARG, 1, + [ww_mutex_trylock() has context arg]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/x86_hypervisor_type.m4 b/drivers/gpu/drm/amd/dkms/m4/x86_hypervisor_type.m4 new file mode 100644 index 0000000000000..677a6050d5745 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/x86_hypervisor_type.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # commit: 03b2a320b19f1424e9ac9c21696be9c60b6d0d93 +dnl # x86/virt: Add enum for hypervisors to replace x86_hyper +dnl # +AC_DEFUN([AC_AMDGPU_X86_HYPERVISOR_TYPE], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + #include + ], [ + enum x86_hypervisor_type test; + test = X86_HYPER_NATIVE; + ], [ + AC_DEFINE(HAVE_X86_HYPERVISOR_TYPE, 1, + [enum x86_hypervisor_type is available]) + ], [ + ]) + ]) +]) \ No newline at end of file diff --git a/drivers/gpu/drm/amd/dkms/m4/xarray.m4 b/drivers/gpu/drm/amd/dkms/m4/xarray.m4 new file mode 100644 index 0000000000000..bfe64c548f1c1 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/xarray.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v4.19-rc5-244-gf8d5d0cc145c +dnl # xarray: Add definition of struct xarray +dnl # +AC_DEFUN([AC_AMDGPU_STRUCT_XARRAY], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct xarray x; + xa_init(&x); + ], [ + AC_DEFINE(HAVE_STRUCT_XARRAY, 1, + [struct xarray is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/zone-managed-pages.m4 b/drivers/gpu/drm/amd/dkms/m4/zone-managed-pages.m4 new file mode 100644 index 0000000000000..a1228bf4f67ac --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/zone-managed-pages.m4 @@ -0,0 +1,32 @@ +dnl # +dnl # commit v4.20-6505-g9705bea5f833 +dnl # Author: Arun KS +dnl # Date: Fri Dec 28 00:34:24 2018 -0800 +dnl # mm: convert zone->managed_pages to atomic variable +dnl # +AC_DEFUN([AC_AMDGPU_ZONE_MANAGED_PAGES], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #include + ],[ + zone_managed_pages(NULL); + ],[ + AC_DEFINE(HAVE_ZONE_MANAGED_PAGES, 1, + [zone_managed_pages() is available]) + ],[ + dnl # + dnl # commit v3.7-4152-g9feedc9d831e + dnl # mm: introduce new field "managed_pages" to struct zone + dnl # + AC_KERNEL_TRY_COMPILE([ + #include + ], [ + struct zone *z = NULL; + z->managed_pages = 0; + ], [ + AC_DEFINE(HAVE_STRUCT_ZONE_MANAGED_PAGES, 1, + [zone->managed_pages is available]) + ]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/zone_device_page_init.m4 b/drivers/gpu/drm/amd/dkms/m4/zone_device_page_init.m4 new file mode 100644 index 0000000000000..56eaeb4b6d888 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/zone_device_page_init.m4 @@ -0,0 +1,17 @@ +dnl # +dnl # v6.0-rc3-597-g0dc45ca1ce18 mm/memremap.c: take a pgmap reference on page allocation +dnl # v6.0-rc3-596-gef233450898f mm: free device private pages have zero refcount +dnl # v5.17-rc4-75-g27674ef6c73f mm: remove the extra ZONE_DEVICE struct page refcount +dnl # +AC_DEFUN([AC_AMDGPU_ZONE_DEVICE_PAGE_INIT], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE_SYMBOL([ + #include + ], [ + zone_device_page_init(NULL); + ], [zone_device_page_init], [mm/memremap.c], [ + AC_DEFINE(HAVE_ZONE_DEVICE_PAGE_INIT, 1, + [zone_device_page_init() is available]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/oot/Makefile.oot b/drivers/gpu/drm/amd/dkms/oot/Makefile.oot new file mode 100644 index 0000000000000..5c8c78df4e932 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/oot/Makefile.oot @@ -0,0 +1,41 @@ +ifneq ($(KERNELRELEASE),) +include $(src)/amd/dkms/Makefile +else +KERNELVER := $(shell uname -r) +kernel_build_dir := /lib/modules/$(KERNELVER)/build +PACKAGE_NAME := $(shell sed -n '/PACKAGE_NAME/s|.*=||p' amd/dkms/dkms.conf) +PACKAGE_VERSION := $(shell sed -n '/PACKAGE_VERSION/s|.*=||p' amd/dkms/dkms.conf) +module_src_dir := $(CURDIR) +module_build_dir := $(shell mktemp -ut amd.XXXXXXXX) +module_build_flags := +num_cpu_cores := $(shell nproc) +Q := @ + +ifeq ($(wildcard $(kernel_build_dir)/include/config/auto.conf),) +$(error "invalid kernel obj dir, is kernel-devel installed?") +endif + +.PHONY: modules pre-build + +include $(kernel_build_dir)/include/config/auto.conf + +ifneq ($(CONFIG_CC_IS_CLANG),) +module_build_flags += CC=clang +endif +ifneq ($(CONFIG_LD_IS_LLD),) +module_build_flags += LD=ld.lld +endif + +modules:pre-build + $(Q)make -j$(num_cpu_cores) KERNELRELEASE=$(KERNELVER) \ + TTM_NAME=amdttm \ + SCHED_NAME=amd-sched \ + -C $(kernel_build_dir) \ + M=$(module_build_dir) $(module_build_flags) + $(Q)unlink $(module_build_dir) + +pre-build: + $(Q)cp -f amd/dkms/oot/pre-build.sh amd/dkms + $(Q)amd/dkms/pre-build.sh $(KERNELVER) $(module_src_dir) $(PACKAGE_NAME) $(PACKAGE_VERSION) $(module_build_dir) + +endif diff --git a/drivers/gpu/drm/amd/dkms/oot/kmod-amdgpu.spec b/drivers/gpu/drm/amd/dkms/oot/kmod-amdgpu.spec new file mode 100644 index 0000000000000..dd92860353f46 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/oot/kmod-amdgpu.spec @@ -0,0 +1,96 @@ +%global pkg amdgpu +%global kernel kernel version +%define pkg_version 6.8.7 +%define osdb_version 1798298 +%define anolis_release 1 + +%global debug_package %{nil} + +Name: kmod-%{pkg} +Version: %(echo %{kernel} | sed -E 's/-/~/g; s/\.(an|al)[0-9]+$//g') +Release: %{pkg_version}_%{osdb_version}~%{anolis_release}%{?dist} +Summary: The amdgpu Linux kernel driver + +License: GPLv2 and Redistributable, no modification permitted +URL: http://www.amd.com/ +Source0: kmod-%{pkg}-%{pkg_version}.tar.gz + +BuildRequires: gcc +BuildRequires: make +Requires: kernel >= %{kernel} + +%description +The AMD display driver kernel module in DKMS format for AMD graphics S/W + +%prep +%autosetup -n kmod-%{pkg}-%{pkg_version} -p1 + +%build +pushd src +%{__make} -f amd/dkms/oot/Makefile.oot KERNELVER=%(uname -r) +popd + +%install +mkdir -p %{buildroot}/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu +%{__install} -D -t %{buildroot}/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu src/amddrm_buddy.ko src/amddrm_ttm_helper.ko src/scheduler/amd-sched.ko src/ttm/amdttm.ko src/amd/amdxcp/amdxcp.ko src/amd/amdgpu/amdgpu.ko src/amd/amdkcl/amdkcl.ko + +# Make .ko objects temporarily executable for automatic stripping +find %{buildroot}/lib/modules -type f -name \*.ko -exec chmod u+x \{\} \+ + +# Generate depmod.conf +%{__install} -d %{buildroot}/%{_sysconfdir}/depmod.d/ +for kmod in $(find %{buildroot}/lib/modules/%{kernel}.%{_arch}/extra -type f -name \*.ko -printf "%%P\n" | sort) +do + echo "override $(basename $kmod .ko) * weak-updates/$(dirname $kmod)" >> %{buildroot}/%{_sysconfdir}/depmod.d/%{pkg}.conf + echo "override $(basename $kmod .ko) * extra/$(dirname $kmod)" >> %{buildroot}/%{_sysconfdir}/depmod.d/%{pkg}.conf +done + +%clean +%{__rm} -rf %{buildroot} + +%post +depmod -a > /dev/null 2>&1 + +if [ -x "/usr/sbin/weak-modules" ]; then + printf '%s\n' "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdgpu.ko" | /usr/sbin/weak-modules --no-initramfs --add-modules + printf '%s\n' "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdkcl.ko" | /usr/sbin/weak-modules --no-initramfs --add-modules + printf '%s\n' "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdxcp.ko" | /usr/sbin/weak-modules --no-initramfs --add-modules + printf '%s\n' "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amddrm_buddy.ko" | /usr/sbin/weak-modules --no-initramfs --add-modules + printf '%s\n' "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amddrm_ttm_helper.ko" | /usr/sbin/weak-modules --no-initramfs --add-modules + printf '%s\n' "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amd-sched.ko" | /usr/sbin/weak-modules --no-initramfs --add-modules + printf '%s\n' "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdttm.ko" | /usr/sbin/weak-modules --no-initramfs --add-modules +fi + +%preun +echo "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdgpu.ko" >> /var/run/rpm-%{pkg}-modules.list +echo "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdkcl.ko" >> /var/run/rpm-%{pkg}-modules.list +echo "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdxcp.ko" >> /var/run/rpm-%{pkg}-modules.list +echo "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amddrm_buddy.ko" >> /var/run/rpm-%{pkg}-modules.list +echo "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amddrm_ttm_helper.ko" >> /var/run/rpm-%{pkg}-modules.list +echo "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amd-sched.ko" >> /var/run/rpm-%{pkg}-modules.list +echo "/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdttm.ko" >> /var/run/rpm-%{pkg}-modules.list + +%postun +depmod -a > /dev/null 2>&1 + +if [ -x "/usr/sbin/weak-modules" ]; then + modules=( $(cat /var/run/rpm-%{pkg}-modules.list) ) + printf '%s\n' "${modules[@]}" | /usr/sbin/weak-modules --no-initramfs --remove-modules +fi +rm /var/run/rpm-%{pkg}-modules.list + +%files +/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdgpu.ko +/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdkcl.ko +/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdxcp.ko +/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amddrm_buddy.ko +/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amddrm_ttm_helper.ko +/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amd-sched.ko +/lib/modules/%{kernel}.%{_arch}/extra/drivers/gpu/drm/amdgpu/amdttm.ko +%defattr(644,root,root,755) +%license licenses +%config(noreplace) %{_sysconfdir}/depmod.d/%{pkg}.conf + +%changelog +* Thu Jul 18 2024 Bob Zhou - 6.8.7-1798298 +- diff --git a/drivers/gpu/drm/amd/dkms/oot/pre-build.sh b/drivers/gpu/drm/amd/dkms/oot/pre-build.sh new file mode 100644 index 0000000000000..7cb58df401aa8 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/oot/pre-build.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +KCL="amd/amdkcl" +INC="include" +SRC="amd/dkms" + +KERNELVER=$1 +DKMS_TREE=$2 +MODULE=$3 +MODULE_VERSION=$4 +MODULE_BUILD_DIR=$5 +KERNELVER_BASE=${KERNELVER%%-*} + +version_lt () { + newest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | tail -n1) + [ "$KERNELVER_BASE" != "$newest" ] +} + +version_ge () { + newest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | tail -n1) + [ "$KERNELVER_BASE" = "$newest" ] +} + +version_gt () { + oldest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | head -n1) + [ "$KERNELVER_BASE" != "$oldest" ] +} + +version_le () { + oldest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | head -n1) + [ "$KERNELVER_BASE" = "$oldest" ] +} + +source $KCL/files + +sed -i -e '/DEFINE_WD_CLASS(reservation_ww_class)/,/EXPORT_SYMBOL(reservation_ww_class)/d' \ + -e '/dma_resv_lockdep/,/subsys_initcall/d' \ + -e '1i\#ifdef HAVE_DMA_RESV_FENCES' \ + -e '$a\#endif' $KCL/dma-buf/dma-resv.c +sed -i -e '/extern struct ww_class reservation_ww_class/i #include ' \ + -e '/struct dma_resv {/, /}/d' \ + -e '/struct dma_resv_iter {/, /}/d' \ + -e '/enum dma_resv_usage {/, /}/d' $INC/linux/dma-resv.h + +# add amd prefix to exported symbols +for file in $FILES; do + awk -F'[()]' '/EXPORT_SYMBOL/ { + print "#define "$2" amd"$2" //"$0 + }' $file | sort -u >>$INC/rename_symbol.h +done + +# rename CONFIG_xxx to CONFIG_xxx_AMDKCL +# otherwise kernel config would override dkms package config +AMDGPU_CONFIG=$(find -name Kconfig -exec grep -h '^config' {} + | sed 's/ /_/' | tr 'a-z' 'A-Z') +TTM_CONFIG=$(awk '/CONFIG_DRM/{gsub(".*\\(CONFIG_DRM","CONFIG_DRM");gsub("\\).*","");print $0}' ttm/Makefile) +SCHED_CONFIG=$(awk '/CONFIG_DRM/{gsub(".*\\(CONFIG_DRM","CONFIG_DRM");gsub("\\).*","");print $0}' scheduler/Makefile) +for config in $AMDGPU_CONFIG $TTM_CONFIG $SCHED_CONFIG; do + for file in $(grep -rl $config ./); do + sed -i "s/\<$config\>/&_AMDKCL/" $file + done + sed -i "/${config}$/s/$/_AMDKCL/" amd/dkms/Makefile +done + +export KERNELVER +ln -s $DKMS_TREE $MODULE_BUILD_DIR + +# Enable gcc-toolset for kernels that are built with non-default compiler +# perform this check only when permissions allow +if [[ -d /opt/rh && `id -u` -eq 0 ]]; then + for f in $(find /opt/rh -type f -a -name gcc); do + [[ -f /boot/config-$KERNELVER ]] || continue + config_gcc_version=$(. /boot/config-$KERNELVER && echo $CONFIG_GCC_VERSION) + IFS='.' read -ra ver <<<$($f -dumpfullversion) + gcc_version=$(printf "%d%02d%02d\n" ${ver[@]}) + if [[ "$config_gcc_version" = "$gcc_version" ]]; then + . ${f%/*}/../../../enable + break + fi + done +fi +echo "PATH=$PATH" >$MODULE_BUILD_DIR/.env + +(cd $SRC && ./configure) + +# rename CFLAGS_target.o / CFLAGS_REMOVE_ to CFLAGS_target.o +# for kernel version < 5.3 +if ! grep -q 'define HAVE_AMDKCL_FLAGS_TAKE_PATH' $SRC/config/config.h; then + for file in $(grep -rl 'CFLAGS_' amd/display/); do + sed -i 's|\(CFLAGS_[A-Z_]*\)$(AMDDALPATH)/.*/\(.*\.o\)|\1\2|' $file + done +fi diff --git a/drivers/gpu/drm/amd/dkms/post-build.sh b/drivers/gpu/drm/amd/dkms/post-build.sh new file mode 100755 index 0000000000000..0c600db277937 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/post-build.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +MODULE_BUILD_DIR=$1 +rm -rf $MODULE_BUILD_DIR diff --git a/drivers/gpu/drm/amd/dkms/pre-build.sh b/drivers/gpu/drm/amd/dkms/pre-build.sh new file mode 100755 index 0000000000000..ec0c41cd4411e --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/pre-build.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +KCL="amd/amdkcl" +INC="include" +SRC="amd/dkms" + +KERNELVER=$1 +DKMS_TREE=$2 +MODULE=$3 +MODULE_VERSION=$4 +MODULE_BUILD_DIR=$5 +KERNELVER_BASE=${KERNELVER%%-*} + +version_lt () { + newest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | tail -n1) + [ "$KERNELVER_BASE" != "$newest" ] +} + +version_ge () { + newest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | tail -n1) + [ "$KERNELVER_BASE" = "$newest" ] +} + +version_gt () { + oldest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | head -n1) + [ "$KERNELVER_BASE" != "$oldest" ] +} + +version_le () { + oldest=$((echo "$KERNELVER_BASE"; echo "$1") | sort -V | head -n1) + [ "$KERNELVER_BASE" = "$oldest" ] +} + +source $KCL/files + +sed -i -e '/DEFINE_WD_CLASS(reservation_ww_class)/,/EXPORT_SYMBOL(reservation_ww_class)/d' \ + -e '/dma_resv_lockdep/,/subsys_initcall/d' \ + -e '1i\#ifdef HAVE_DMA_RESV_FENCES' \ + -e '$a\#endif' $KCL/dma-buf/dma-resv.c +sed -i -e '/extern struct ww_class reservation_ww_class/i #include ' \ + -e '/struct dma_resv {/, /}/d' \ + -e '/struct dma_resv_iter {/, /}/d' \ + -e '/enum dma_resv_usage {/, /}/d' $INC/linux/dma-resv.h + +# add amd prefix to exported symbols +for file in $FILES; do + awk -F'[()]' '/EXPORT_SYMBOL/ { + print "#define "$2" amd"$2" //"$0 + }' $file | sort -u >>$INC/rename_symbol.h +done + +# rename CONFIG_xxx to CONFIG_xxx_AMDKCL +# otherwise kernel config would override dkms package config +AMDGPU_CONFIG=$(find -name Kconfig -exec grep -h '^config' {} + | sed 's/ /_/' | tr 'a-z' 'A-Z') +TTM_CONFIG=$(awk '/CONFIG_DRM/{gsub(".*\\(CONFIG_DRM","CONFIG_DRM");gsub("\\).*","");print $0}' ttm/Makefile) +SCHED_CONFIG=$(awk '/CONFIG_DRM/{gsub(".*\\(CONFIG_DRM","CONFIG_DRM");gsub("\\).*","");print $0}' scheduler/Makefile) +for config in $AMDGPU_CONFIG $TTM_CONFIG $SCHED_CONFIG; do + for file in $(grep -rl $config ./); do + sed -i "s/\<$config\>/&_AMDKCL/" $file + done + sed -i "/${config}$/s/$/_AMDKCL/" amd/dkms/Makefile +done + +export KERNELVER +ln -s $DKMS_TREE/$MODULE/$MODULE_VERSION/build $MODULE_BUILD_DIR + +# Enable gcc-toolset for kernels that are built with non-default compiler +if [[ -d /opt/rh ]]; then + for f in $(find /opt/rh -type f -a -name gcc); do + [[ -f /boot/config-$KERNELVER ]] || continue + config_gcc_version=$(. /boot/config-$KERNELVER && echo $CONFIG_GCC_VERSION) + IFS='.' read -ra ver <<<$($f -dumpfullversion) + gcc_version=$(printf "%d%02d%02d\n" ${ver[@]}) + if [[ "$config_gcc_version" = "$gcc_version" ]]; then + . ${f%/*}/../../../enable + break + fi + done +fi +echo "PATH=$PATH" >$MODULE_BUILD_DIR/.env + +(cd $SRC && ./configure) + +# rename CFLAGS_target.o / CFLAGS_REMOVE_ to CFLAGS_target.o +# for kernel version < 5.3 +if ! grep -q 'define HAVE_AMDKCL_FLAGS_TAKE_PATH' $SRC/config/config.h; then + for file in $(grep -rl 'CFLAGS_' amd/display/); do + sed -i 's|\(CFLAGS_[A-Z_]*\)$(AMDDALPATH)/.*/\(.*\.o\)|\1\2|' $file + done +fi diff --git a/drivers/gpu/drm/amd/dkms/sources b/drivers/gpu/drm/amd/dkms/sources new file mode 100644 index 0000000000000..373b38d0d2325 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/sources @@ -0,0 +1,35 @@ +# +# The 'sources' file contains source/destination directives to be used +# by the build framework to construct the DKMS source tree +# +# File format: +# source destination +# ------ ----------- +# directory[/file] name directory/[file] name +# must exist at the directory: must have explicit directory name +# source path e.g. dir/ (with '/'). The name without +# slash is treated as a file if it does +# not exist +# the directory will be created if it +# does not exist +# file: optional file name at the destination +# +drivers/gpu/drm/amd . +drivers/gpu/drm/ttm . +include/drm/ttm include/drm/ +include/uapi/drm/amdgpu_drm.h include/uapi/drm/ +include/kcl include/ +drivers/gpu/drm/scheduler . +include/drm/gpu_scheduler.h include/drm/ +include/drm/amd_asic_type.h include/drm/ +include/drm/spsc_queue.h include/drm/ +include/uapi/linux/kfd_ioctl.h include/uapi/linux/ +include/drm/amd_rdma.h include/drm/ +drivers/dma-buf/dma-resv.c amd/amdkcl/dma-buf/ +include/linux/dma-resv.h include/linux/ +include/kcl/reservation.h include/linux/ +include/uapi/linux/kfd_sysfs.h include/uapi/linux/ +drivers/gpu/drm/drm_gem_ttm_helper.c . +include/drm/drm_gem_ttm_helper.h include/drm/ +drivers/gpu/drm/drm_buddy.c . +include/drm/drm_buddy.h include/drm/ diff --git a/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drmP.h b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drmP.h new file mode 100644 index 0000000000000..c616e0e2c1798 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drmP.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRMP_H_H_ +#define _KCL_HEADER_DRMP_H_H_ + +#ifdef HAVE_DRM_DRMP_H +struct vm_area_struct; +#include_next +#endif + +#endif diff --git a/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_aperture.h b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_aperture.h new file mode 100644 index 0000000000000..9197d9538fc69 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_aperture.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_APERTURE_H_H_ +#define _KCL_HEADER_DRM_APERTURE_H_H_ + +#if defined(HAVE_DRM_DRM_APERTURE_H) +#include_next +#endif + +#endif diff --git a/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dp_helper.h b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dp_helper.h new file mode 100644 index 0000000000000..820228761e24b --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dp_helper.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DP_HELPER_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DP_HELPER_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_DP_HELPER_H) +#include +#elif defined(HAVE_DRM_DP_DRM_DP_HELPER_H) +#include +#else +#include_next +#endif + +#endif + diff --git a/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dp_mst_helper.h b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dp_mst_helper.h new file mode 100644 index 0000000000000..8a1cf0f4f9e33 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dp_mst_helper.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DP_MST_HELPER_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DP_MST_HELPER_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H) +#include +#elif defined(HAVE_DRM_DP_DRM_DP_MST_HELPER_H) +#include +#else +#include_next +#endif + +#endif + diff --git a/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dsc.h b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dsc.h new file mode 100644 index 0000000000000..dfc77f48cef83 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_dsc.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DSC_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DSC_H_H_ + + +#if defined(HAVE_DRM_DISPLAY_DRM_DSC_HELPER_H) +#include +#endif + +#if defined(HAVE_DRM_DISPLAY_DRM_DSC_H) +#include +#else +#include_next +#endif + +#endif + diff --git a/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_hdcp.h b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_hdcp.h new file mode 100644 index 0000000000000..309ffe3820d70 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/tiny_wrapper/include/drm/drm_hdcp.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_DISPLAY_HDCP_H_INCLUDED_H_ +#define _KCL_HEADER_DRM_DISPLAY_HDCP_H_INCLUDED_H_ + +#ifdef HAVE_DRM_DISPLAY_DRM_HDCP_H +#include +#include +#else +#include_next +#endif + +#endif diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index f5b725f10a7ce..f98b3a5444779 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -28,6 +28,8 @@ #define AMD_MAX_USEC_TIMEOUT 1000000 /* 1000 ms */ +struct amdgpu_ip_block; + /* * Chip flags @@ -61,7 +63,7 @@ enum amd_apu_flags { * acquires the list of IP blocks for the GPU in use on initialization. * It can then operate on this list to perform standard driver operations * such as: init, fini, suspend, resume, etc. -* +* * * IP block implementations are named using the following convention: * _v (E.g.: gfx_v6_0). @@ -85,7 +87,7 @@ enum amd_apu_flags { * @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler * @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine * @AMD_IP_BLOCK_TYPE_VPE: Video Processing Engine -* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Schduler for Multimedia +* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Scheduler for Multimedia * @AMD_IP_BLOCK_TYPE_ISP: Image Signal Processor * @AMD_IP_BLOCK_TYPE_NUM: Total number of IP block types */ @@ -251,19 +253,92 @@ enum DC_FEATURE_MASK { DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4 }; +/** + * enum DC_DEBUG_MASK - Bits that are useful for debugging the Display Core IP + */ enum DC_DEBUG_MASK { + /** + * @DC_DISABLE_PIPE_SPLIT: If set, disable pipe-splitting + */ DC_DISABLE_PIPE_SPLIT = 0x1, + + /** + * @DC_DISABLE_STUTTER: If set, disable memory stutter mode + */ DC_DISABLE_STUTTER = 0x2, + + /** + * @DC_DISABLE_DSC: If set, disable display stream compression + */ DC_DISABLE_DSC = 0x4, + + /** + * @DC_DISABLE_CLOCK_GATING: If set, disable clock gating optimizations + */ DC_DISABLE_CLOCK_GATING = 0x8, + + /** + * @DC_DISABLE_PSR: If set, disable Panel self refresh v1 and PSR-SU + */ DC_DISABLE_PSR = 0x10, + + /** + * @DC_FORCE_SUBVP_MCLK_SWITCH: If set, force mclk switch in subvp, even + * if mclk switch in vblank is possible + */ DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, + + /** + * @DC_DISABLE_MPO: If set, disable multi-plane offloading + */ DC_DISABLE_MPO = 0x40, + + /** + * @DC_ENABLE_DPIA_TRACE: If set, enable trace logging for DPIA + */ DC_ENABLE_DPIA_TRACE = 0x80, + + /** + * @DC_ENABLE_DML2: If set, force usage of DML2, even if the DCN version + * does not default to it. + */ DC_ENABLE_DML2 = 0x100, + + /** + * @DC_DISABLE_PSR_SU: If set, disable PSR SU + */ DC_DISABLE_PSR_SU = 0x200, + + /** + * @DC_DISABLE_REPLAY: If set, disable Panel Replay + */ DC_DISABLE_REPLAY = 0x400, + + /** + * @DC_DISABLE_IPS: If set, disable all Idle Power States, all the time. + * If more than one IPS debug bit is set, the lowest bit takes + * precedence. For example, if DC_FORCE_IPS_ENABLE and + * DC_DISABLE_IPS_DYNAMIC are set, then DC_DISABLE_IPS_DYNAMIC takes + * precedence. + */ DC_DISABLE_IPS = 0x800, + + /** + * @DC_DISABLE_IPS_DYNAMIC: If set, disable all IPS, all the time, + * *except* when driver goes into suspend. + */ + DC_DISABLE_IPS_DYNAMIC = 0x1000, + + /** + * @DC_DISABLE_IPS2_DYNAMIC: If set, disable IPS2 (IPS1 allowed) if + * there is an enabled display. Otherwise, enable all IPS. + */ + DC_DISABLE_IPS2_DYNAMIC = 0x2000, + + /** + * @DC_FORCE_IPS_ENABLE: If set, force enable all IPS, all the time. + */ + DC_FORCE_IPS_ENABLE = 0x4000, }; enum amd_dpm_forced_level; @@ -304,30 +379,30 @@ enum amd_dpm_forced_level; */ struct amd_ip_funcs { char *name; - int (*early_init)(void *handle); - int (*late_init)(void *handle); - int (*sw_init)(void *handle); - int (*sw_fini)(void *handle); - int (*early_fini)(void *handle); - int (*hw_init)(void *handle); - int (*hw_fini)(void *handle); - void (*late_fini)(void *handle); - int (*prepare_suspend)(void *handle); - int (*suspend)(void *handle); - int (*resume)(void *handle); + int (*early_init)(struct amdgpu_ip_block *ip_block); + int (*late_init)(struct amdgpu_ip_block *ip_block); + int (*sw_init)(struct amdgpu_ip_block *ip_block); + int (*sw_fini)(struct amdgpu_ip_block *ip_block); + int (*early_fini)(struct amdgpu_ip_block *ip_block); + int (*hw_init)(struct amdgpu_ip_block *ip_block); + int (*hw_fini)(struct amdgpu_ip_block *ip_block); + void (*late_fini)(struct amdgpu_ip_block *ip_block); + int (*prepare_suspend)(struct amdgpu_ip_block *ip_block); + int (*suspend)(struct amdgpu_ip_block *ip_block); + int (*resume)(struct amdgpu_ip_block *ip_block); bool (*is_idle)(void *handle); - int (*wait_for_idle)(void *handle); - bool (*check_soft_reset)(void *handle); - int (*pre_soft_reset)(void *handle); - int (*soft_reset)(void *handle); - int (*post_soft_reset)(void *handle); + int (*wait_for_idle)(struct amdgpu_ip_block *ip_block); + bool (*check_soft_reset)(struct amdgpu_ip_block *ip_block); + int (*pre_soft_reset)(struct amdgpu_ip_block *ip_block); + int (*soft_reset)(struct amdgpu_ip_block *ip_block); + int (*post_soft_reset)(struct amdgpu_ip_block *ip_block); int (*set_clockgating_state)(void *handle, enum amd_clockgating_state state); int (*set_powergating_state)(void *handle, enum amd_powergating_state state); void (*get_clockgating_state)(void *handle, u64 *flags); - void (*dump_ip_state)(void *handle); - void (*print_ip_state)(void *handle, struct drm_printer *p); + void (*dump_ip_state)(struct amdgpu_ip_block *ip_block); + void (*print_ip_state)(struct amdgpu_ip_block *ip_block, struct drm_printer *p); }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h index f42a276499cd1..5d9d5fea6e06b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h @@ -6199,10 +6199,12 @@ #define DCHUBBUB_CTRL_STATUS__ROB_UNDERFLOW_STATUS__SHIFT 0x1 #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_STATUS__SHIFT 0x2 #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_CLEAR__SHIFT 0x3 +#define DCHUBBUB_CTRL_STATUS__DCHUBBUB_HW_DEBUG__SHIFT 0x4 #define DCHUBBUB_CTRL_STATUS__CSTATE_SWATH_CHK_GOOD_MODE__SHIFT 0x1f #define DCHUBBUB_CTRL_STATUS__ROB_UNDERFLOW_STATUS_MASK 0x00000002L #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_STATUS_MASK 0x00000004L #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_CLEAR_MASK 0x00000008L +#define DCHUBBUB_CTRL_STATUS__DCHUBBUB_HW_DEBUG_MASK 0x3FFFFFF0L #define DCHUBBUB_CTRL_STATUS__CSTATE_SWATH_CHK_GOOD_MODE_MASK 0x80000000L //DCHUBBUB_TIMEOUT_DETECTION_CTRL1 #define DCHUBBUB_TIMEOUT_DETECTION_CTRL1__DCHUBBUB_TIMEOUT_ERROR_STATUS__SHIFT 0x0 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_default.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_default.h index 320e1ee5df1a9..2050888f7ec6d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_default.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_default.h @@ -2616,6 +2616,13 @@ #define mmSPI_WCL_PIPE_PERCENT_CS5_DEFAULT 0x0000007f #define mmSPI_WCL_PIPE_PERCENT_CS6_DEFAULT 0x0000007f #define mmSPI_WCL_PIPE_PERCENT_CS7_DEFAULT 0x0000007f +#define mmSPI_GDBG_WAVE_CNTL_DEFAULT 0x00000000 +#define mmSPI_GDBG_TRAP_CONFIG_DEFAULT 0x00000000 +#define mmSPI_GDBG_TRAP_MASK_DEFAULT 0x00000000 +#define mmSPI_GDBG_WAVE_CNTL2_DEFAULT 0x00000000 +#define mmSPI_GDBG_WAVE_CNTL3_DEFAULT 0x00000000 +#define mmSPI_GDBG_TRAP_DATA0_DEFAULT 0x00000000 +#define mmSPI_GDBG_TRAP_DATA1_DEFAULT 0x00000000 #define mmSPI_COMPUTE_QUEUE_RESET_DEFAULT 0x00000000 #define mmSPI_RESOURCE_RESERVE_CU_0_DEFAULT 0x00000000 #define mmSPI_RESOURCE_RESERVE_CU_1_DEFAULT 0x00000000 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h index 12d451e5475b7..5b17d90664524 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h @@ -462,6 +462,8 @@ #define mmSQ_IND_DATA_BASE_IDX 0 #define mmSQ_CMD 0x037b #define mmSQ_CMD_BASE_IDX 0 +#define mmSQ_HOSTTRAP_STATUS 0x0376 +#define mmSQ_HOSTTRAP_STATUS_BASE_IDX 0 #define mmSQ_TIME_HI 0x037c #define mmSQ_TIME_HI_BASE_IDX 0 #define mmSQ_TIME_LO 0x037d diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h index 2dfa0e5b1aa3e..3e0210c2bf369 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h @@ -2616,6 +2616,11 @@ //SQ_CMD_TIMESTAMP #define SQ_CMD_TIMESTAMP__TIMESTAMP__SHIFT 0x0 #define SQ_CMD_TIMESTAMP__TIMESTAMP_MASK 0x000000FFL +//SQ_HOSTTRAP_STATUS +#define SQ_HOSTTRAP_STATUS__HTPENDINGCOUNT__SHIFT 0x0 +#define SQ_HOSTTRAP_STATUS__HTPENDING_OVERRIDE__SHIFT 0x8 +#define SQ_HOSTTRAP_STATUS__HTPENDINGCOUNT_MASK 0x000000FFL +#define SQ_HOSTTRAP_STATUS__HTPENDING_OVERRIDE_MASK 0x00000100L //SQ_IND_INDEX #define SQ_IND_INDEX__WAVE_ID__SHIFT 0x0 #define SQ_IND_INDEX__SIMD_ID__SHIFT 0x4 diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 09cbc3afd6d89..b0fc22383e287 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1038,7 +1038,7 @@ struct display_object_info_table_v1_4 uint16_t supporteddevices; uint8_t number_of_path; uint8_t reserved; - struct atom_display_object_path_v2 display_path[8]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path + struct atom_display_object_path_v2 display_path[]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path }; struct display_object_info_table_v1_5 { @@ -1048,7 +1048,7 @@ struct display_object_info_table_v1_5 { uint8_t reserved; // the real number of this included in the structure is calculated by using the // (whole structure size - the header size- number_of_path)/size of atom_display_object_path - struct atom_display_object_path_v3 display_path[8]; + struct atom_display_object_path_v3 display_path[]; }; /* diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 46bf19c9c5c40..710e328fad48f 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -258,6 +258,48 @@ struct gc_info_v1_2 { uint32_t gc_gl2c_per_gpu; }; +struct gc_info_v1_3 { + struct gpu_info_header header; + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_size; + uint32_t gc_tcc_cache_line_size; +}; + struct gc_info_v2_0 { struct gpu_info_header header; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 7744ca3ef4b19..f221e5bee50fc 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -31,6 +31,8 @@ #include #include #include +#include + #include "amdgpu_irq.h" #include "amdgpu_gfx.h" @@ -71,6 +73,11 @@ enum kgd_memory_pool { KGD_POOL_FRAMEBUFFER = 3, }; +struct kfd_cu_occupancy { + u32 wave_cnt; + u32 doorbell_off; +}; + /** * enum kfd_sched_policy * @@ -199,6 +206,11 @@ struct tile_config { * IH ring entry. This function allows the KFD ISR to get the VMID * from the fault status register as early as possible. * + * @get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values + * + * @build_grace_period_packet_info: build a IQ_WAUT_TIME2 reg value with an + * updated grace period value. + * * @get_cu_occupancy: Function pointer that returns to caller the number * of wave fronts that are in flight for all of the queues of a process * as identified by its pasid. It is important to note that the value @@ -313,8 +325,9 @@ struct kfd2kgd_calls { uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); - void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, - int *wave_cnt, int *max_waves_per_cu, uint32_t inst); + void (*get_cu_occupancy)(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); @@ -324,6 +337,15 @@ struct kfd2kgd_calls { uint64_t (*hqd_reset)(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t inst, unsigned int utimeout); + uint32_t (*trigger_pc_sample_trap)(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t *target_simd, + uint32_t *target_wave_slot, + enum kfd_ioctl_pc_sample_method method, + uint32_t inst); + void (*override_core_cg)(struct amdgpu_device *adev, + uint32_t value, + uint32_t inst); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 19a48d98830a3..0cec1c4d4e266 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -291,6 +291,15 @@ enum pp_policy_soc_pstate { #define PP_POLICY_MAX_LEVELS 5 +enum pp_pm_phase_det_param_id { + PP_PM_PHASE_DET_LO_FREQ = 0, + PP_PM_PHASE_DET_HI_FREQ = 1, + PP_PM_PHASE_DET_THRESH = 2, + PP_PM_PHASE_DET_ALPHA = 3, + PP_PM_PHASE_DET_HYST = 4, + PP_PM_PHASE_DET_ALL = 5, +}; + #define PP_GROUP_MASK 0xF0000000 #define PP_GROUP_SHIFT 28 @@ -336,7 +345,8 @@ enum pp_policy_soc_pstate { #define MAX_CLKS 4 #define NUM_VCN 4 #define NUM_JPEG_ENG 32 - +#define MAX_XCC 8 +#define NUM_XCP 8 struct seq_file; enum amd_pp_clock_type; struct amd_pp_simple_clock_info; @@ -350,6 +360,15 @@ struct pp_smu_wm_range_sets; struct pp_smu_nv_clock_table; struct dpm_clocks; +struct amdgpu_xcp_metrics { + /* Utilization Instantaneous (%) */ + u32 gfx_busy_inst[MAX_XCC]; + u16 jpeg_busy[NUM_JPEG_ENG]; + u16 vcn_busy[NUM_VCN]; + /* Utilization Accumulated (%) */ + u64 gfx_busy_acc[MAX_XCC]; +}; + struct amd_pm_funcs { /* export for dpm on ci and si */ int (*pre_set_power_state)(void *handle); @@ -872,6 +891,97 @@ struct gpu_metrics_v1_5 { uint16_t padding; }; +struct gpu_metrics_v1_6 { + struct metrics_table_header common_header; + + /* Temperature (Celsius) */ + uint16_t temperature_hotspot; + uint16_t temperature_mem; + uint16_t temperature_vrsoc; + + /* Power (Watts) */ + uint16_t curr_socket_power; + + /* Utilization (%) */ + uint16_t average_gfx_activity; + uint16_t average_umc_activity; // memory controller + + /* Energy (15.259uJ (2^-16) units) */ + uint64_t energy_accumulator; + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Accumulation cycle counter */ + uint32_t accumulation_counter; + + /* Accumulated throttler residencies */ + uint32_t prochot_residency_acc; + uint32_t ppt_residency_acc; + uint32_t socket_thm_residency_acc; + uint32_t vr_thm_residency_acc; + uint32_t hbm_thm_residency_acc; + + /* Clock Lock Status. Each bit corresponds to clock instance */ + uint32_t gfxclk_lock_status; + + /* Link width (number of lanes) and speed (in 0.1 GT/s) */ + uint16_t pcie_link_width; + uint16_t pcie_link_speed; + + /* XGMI bus width and bitrate (in Gbps) */ + uint16_t xgmi_link_width; + uint16_t xgmi_link_speed; + + /* Utilization Accumulated (%) */ + uint32_t gfx_activity_acc; + uint32_t mem_activity_acc; + + /*PCIE accumulated bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_acc; + + /*PCIE instantaneous bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_inst; + + /* PCIE L0 to recovery state transition accumulated count */ + uint64_t pcie_l0_to_recov_count_acc; + + /* PCIE replay accumulated count */ + uint64_t pcie_replay_count_acc; + + /* PCIE replay rollover accumulated count */ + uint64_t pcie_replay_rover_count_acc; + + /* PCIE NAK sent accumulated count */ + uint32_t pcie_nak_sent_count_acc; + + /* PCIE NAK received accumulated count */ + uint32_t pcie_nak_rcvd_count_acc; + + /* XGMI accumulated data transfer size(KiloBytes) */ + uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS]; + uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS]; + + /* PMFW attached timestamp (10ns resolution) */ + uint64_t firmware_timestamp; + + /* Current clocks (Mhz) */ + uint16_t current_gfxclk[MAX_GFX_CLKS]; + uint16_t current_socclk[MAX_CLKS]; + uint16_t current_vclk0[MAX_CLKS]; + uint16_t current_dclk0[MAX_CLKS]; + uint16_t current_uclk; + + /* Number of current partition */ + uint16_t num_partition; + + /* XCP metrics stats */ + struct amdgpu_xcp_metrics xcp_stats[NUM_XCP]; + + /* PCIE other end recovery counter */ + uint32_t pcie_lc_perf_other_end_recovery; +}; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 9dc82f4d7c937..10fd51553f68a 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -1884,3 +1884,11 @@ int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev, return ret; } + +void amdgpu_dpm_phase_det_debugfs_init(struct amdgpu_device *adev) +{ + if (!is_support_sw_smu(adev)) + return; + + amdgpu_smu_phase_det_debugfs_init(adev); +} diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index d5d6ab484e5ad..8263e62023936 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4531,6 +4531,7 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) dev_info(adev->dev, "overdrive feature is not supported\n"); } +#ifdef HAVE_PCI_DRIVER_DEV_GROUPS if (amdgpu_dpm_get_pm_policy_info(adev, PP_PM_POLICY_NONE, NULL) != -EOPNOTSUPP) { ret = devm_device_add_group(adev->dev, @@ -4538,6 +4539,7 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) if (ret) goto err_out0; } +#endif adev->pm.sysfs_initialized = true; @@ -4842,5 +4844,6 @@ void amdgpu_debugfs_pm_init(struct amdgpu_device *adev) adev->pm.smu_prv_buffer_size); amdgpu_dpm_stb_debug_fs_init(adev); + amdgpu_dpm_phase_det_debugfs_init(adev); #endif } diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index f5bf41f21c412..618d56d9e005e 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -597,5 +597,6 @@ int amdgpu_dpm_set_pm_policy(struct amdgpu_device *adev, int policy_type, int policy_level); ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, enum pp_pm_policy p_type, char *buf); +void amdgpu_dpm_phase_det_debugfs_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index e8b6989a40f35..2cd6cb991f29c 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -2954,9 +2954,9 @@ static int kv_dpm_get_temp(void *handle) return actual_temp; } -static int kv_dpm_early_init(void *handle) +static int kv_dpm_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->powerplay.pp_funcs = &kv_dpm_funcs; adev->powerplay.pp_handle = adev; @@ -2965,10 +2965,10 @@ static int kv_dpm_early_init(void *handle) return 0; } -static int kv_dpm_late_init(void *handle) +static int kv_dpm_late_init(struct amdgpu_ip_block *ip_block) { /* powerdown unused blocks for now */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->pm.dpm_enabled) return 0; @@ -2979,11 +2979,10 @@ static int kv_dpm_late_init(void *handle) return 0; } -static int kv_dpm_sw_init(void *handle) +static int kv_dpm_sw_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - + struct amdgpu_device *adev = ip_block->adev; ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) @@ -3024,9 +3023,9 @@ static int kv_dpm_sw_init(void *handle) return ret; } -static int kv_dpm_sw_fini(void *handle) +static int kv_dpm_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; flush_work(&adev->pm.dpm.thermal.work); @@ -3035,10 +3034,10 @@ static int kv_dpm_sw_fini(void *handle) return 0; } -static int kv_dpm_hw_init(void *handle) +static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!amdgpu_dpm) return 0; @@ -3053,9 +3052,9 @@ static int kv_dpm_hw_init(void *handle) return ret; } -static int kv_dpm_hw_fini(void *handle) +static int kv_dpm_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->pm.dpm_enabled) kv_dpm_disable(adev); @@ -3063,9 +3062,9 @@ static int kv_dpm_hw_fini(void *handle) return 0; } -static int kv_dpm_suspend(void *handle) +static int kv_dpm_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->pm.dpm_enabled) { /* disable dpm */ @@ -3076,10 +3075,10 @@ static int kv_dpm_suspend(void *handle) return 0; } -static int kv_dpm_resume(void *handle) +static int kv_dpm_resume(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->pm.dpm_enabled) { /* asic init will reset to the boot state */ @@ -3100,13 +3099,13 @@ static bool kv_dpm_is_idle(void *handle) return true; } -static int kv_dpm_wait_for_idle(void *handle) +static int kv_dpm_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int kv_dpm_soft_reset(void *handle) +static int kv_dpm_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index a1baa13ab2c26..a4908f0402f1a 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -4755,13 +4755,15 @@ static int si_populate_memory_timing_parameters(struct amdgpu_device *adev, u32 dram_timing; u32 dram_timing2; u32 burst_time; + int ret; arb_regs->mc_arb_rfsh_rate = (u8)si_calculate_memory_refresh_rate(adev, pl->sclk); - amdgpu_atombios_set_engine_dram_timings(adev, - pl->sclk, - pl->mclk); + ret = amdgpu_atombios_set_engine_dram_timings(adev, pl->sclk, + pl->mclk); + if (ret) + return ret; dram_timing = RREG32(MC_ARB_DRAM_TIMING); dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2); @@ -7619,10 +7621,10 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, return 0; } -static int si_dpm_late_init(void *handle) +static int si_dpm_late_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!adev->pm.dpm_enabled) return 0; @@ -7716,10 +7718,10 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) return err; } -static int si_dpm_sw_init(void *handle) +static int si_dpm_sw_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) @@ -7763,9 +7765,9 @@ static int si_dpm_sw_init(void *handle) return ret; } -static int si_dpm_sw_fini(void *handle) +static int si_dpm_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; flush_work(&adev->pm.dpm.thermal.work); @@ -7774,11 +7776,11 @@ static int si_dpm_sw_fini(void *handle) return 0; } -static int si_dpm_hw_init(void *handle) +static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!amdgpu_dpm) return 0; @@ -7793,9 +7795,9 @@ static int si_dpm_hw_init(void *handle) return ret; } -static int si_dpm_hw_fini(void *handle) +static int si_dpm_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->pm.dpm_enabled) si_dpm_disable(adev); @@ -7803,9 +7805,9 @@ static int si_dpm_hw_fini(void *handle) return 0; } -static int si_dpm_suspend(void *handle) +static int si_dpm_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->pm.dpm_enabled) { /* disable dpm */ @@ -7816,10 +7818,10 @@ static int si_dpm_suspend(void *handle) return 0; } -static int si_dpm_resume(void *handle) +static int si_dpm_resume(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->pm.dpm_enabled) { /* asic init will reset to the boot state */ @@ -7841,13 +7843,13 @@ static bool si_dpm_is_idle(void *handle) return true; } -static int si_dpm_wait_for_idle(void *handle) +static int si_dpm_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* XXX */ return 0; } -static int si_dpm_soft_reset(void *handle) +static int si_dpm_soft_reset(struct amdgpu_ip_block *ip_block) { return 0; } @@ -7928,10 +7930,10 @@ static void si_dpm_print_power_state(void *handle, amdgpu_dpm_print_ps_status(adev, rps); } -static int si_dpm_early_init(void *handle) +static int si_dpm_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->powerplay.pp_funcs = &si_dpm_funcs; adev->powerplay.pp_handle = adev; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index a71c6117d7e54..f193c77cc1413 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -75,11 +75,10 @@ static void amd_powerplay_destroy(struct amdgpu_device *adev) hwmgr = NULL; } -static int pp_early_init(void *handle) +static int pp_early_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = handle; - + struct amdgpu_device *adev = ip_block->adev; ret = amd_powerplay_create(adev); if (ret != 0) @@ -131,9 +130,9 @@ static void pp_swctf_delayed_work_handler(struct work_struct *work) orderly_poweroff(true); } -static int pp_sw_init(void *handle) +static int pp_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; int ret = 0; @@ -148,9 +147,9 @@ static int pp_sw_init(void *handle) return ret; } -static int pp_sw_fini(void *handle) +static int pp_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; hwmgr_sw_fini(hwmgr); @@ -160,10 +159,10 @@ static int pp_sw_fini(void *handle) return 0; } -static int pp_hw_init(void *handle) +static int pp_hw_init(struct amdgpu_ip_block *ip_block) { int ret = 0; - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; ret = hwmgr_hw_init(hwmgr); @@ -174,10 +173,9 @@ static int pp_hw_init(void *handle) return ret; } -static int pp_hw_fini(void *handle) +static int pp_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + struct pp_hwmgr *hwmgr = ip_block->adev->powerplay.pp_handle; cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); @@ -217,9 +215,9 @@ static void pp_reserve_vram_for_smu(struct amdgpu_device *adev) } } -static int pp_late_init(void *handle) +static int pp_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; if (hwmgr && hwmgr->pm_en) @@ -231,9 +229,9 @@ static int pp_late_init(void *handle) return 0; } -static void pp_late_fini(void *handle) +static void pp_late_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; if (adev->pm.smu_prv_buffer) amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL); @@ -246,12 +244,12 @@ static bool pp_is_idle(void *handle) return false; } -static int pp_wait_for_idle(void *handle) +static int pp_wait_for_idle(struct amdgpu_ip_block *ip_block) { return 0; } -static int pp_sw_reset(void *handle) +static int pp_sw_reset(struct amdgpu_ip_block *ip_block) { return 0; } @@ -262,9 +260,9 @@ static int pp_set_powergating_state(void *handle, return 0; } -static int pp_suspend(void *handle) +static int pp_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; cancel_delayed_work_sync(&hwmgr->swctf_delayed_work); @@ -272,10 +270,9 @@ static int pp_suspend(void *handle) return hwmgr_suspend(hwmgr); } -static int pp_resume(void *handle) +static int pp_resume(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + struct pp_hwmgr *hwmgr = ip_block->adev->powerplay.pp_handle; return hwmgr_resume(hwmgr); } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c index ca1c7ae8d146d..f06b29e33ba45 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c @@ -1183,6 +1183,8 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, fw_info = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); + PP_ASSERT_WITH_CODE(fw_info != NULL, + "Missing firmware info!", return -EINVAL); if ((fw_info->ucTableFormatRevision == 1) && (le16_to_cpu(fw_info->usStructureSize) >= sizeof(ATOM_FIRMWARE_INFO_V1_4))) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 42adc2a3dcbc1..ec6cec793c25c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -192,7 +192,11 @@ struct smu10_clock_voltage_dependency_record { struct smu10_voltage_dependency_table { uint32_t count; - struct smu10_clock_voltage_dependency_record entries[] __counted_by(count); + struct smu10_clock_voltage_dependency_record entries[] +#ifdef __counted_by + __counted_by(count) +#endif + ; }; struct smu10_clock_voltage_information { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 9d7454b3c3143..a6011cdf79f0d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -549,7 +549,8 @@ bool is_support_sw_smu(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20) return false; - if (amdgpu_ip_version(adev, MP1_HWIP, 0) >= IP_VERSION(11, 0, 0)) + if ((amdgpu_ip_version(adev, MP1_HWIP, 0) >= IP_VERSION(11, 0, 0)) && + amdgpu_device_ip_is_valid(adev, AMD_IP_BLOCK_TYPE_SMC)) return true; return false; @@ -741,9 +742,9 @@ static int smu_set_funcs(struct amdgpu_device *adev) return 0; } -static int smu_early_init(void *handle) +static int smu_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu; int r; @@ -825,9 +826,9 @@ static int smu_apply_default_config_table_settings(struct smu_context *smu) return smu_set_config_table(smu, &adev->pm.config_table); } -static int smu_late_init(void *handle) +static int smu_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; int ret = 0; @@ -1234,9 +1235,9 @@ static void smu_init_xgmi_plpd_mode(struct smu_context *smu) } } -static int smu_sw_init(void *handle) +static int smu_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; int ret; @@ -1257,7 +1258,6 @@ static int smu_sw_init(void *handle) atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; @@ -1265,6 +1265,7 @@ static int smu_sw_init(void *handle) smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; @@ -1313,9 +1314,9 @@ static int smu_sw_init(void *handle) return 0; } -static int smu_sw_fini(void *handle) +static int smu_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; int ret; @@ -1786,10 +1787,10 @@ static int smu_start_smc_engine(struct smu_context *smu) return ret; } -static int smu_hw_init(void *handle) +static int smu_hw_init(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) { @@ -2008,9 +2009,9 @@ static int smu_reset_mp1_state(struct smu_context *smu) return ret; } -static int smu_hw_fini(void *handle) +static int smu_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; int ret; @@ -2041,9 +2042,9 @@ static int smu_hw_fini(void *handle) return 0; } -static void smu_late_fini(void *handle) +static void smu_late_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; kfree(smu); @@ -2052,26 +2053,31 @@ static void smu_late_fini(void *handle) static int smu_reset(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; + struct amdgpu_ip_block *ip_block; int ret; - ret = smu_hw_fini(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC); + if (!ip_block) + return -EINVAL; + + ret = smu_hw_fini(ip_block); if (ret) return ret; - ret = smu_hw_init(adev); + ret = smu_hw_init(ip_block); if (ret) return ret; - ret = smu_late_init(adev); + ret = smu_late_init(ip_block); if (ret) return ret; return 0; } -static int smu_suspend(void *handle) +static int smu_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; int ret; uint64_t count; @@ -2103,10 +2109,10 @@ static int smu_suspend(void *handle) return 0; } -static int smu_resume(void *handle) +static int smu_resume(struct amdgpu_ip_block *ip_block) { int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) @@ -3818,3 +3824,230 @@ int smu_send_rma_reason(struct smu_context *smu) return ret; } + +int smu_set_phase_det_param(struct smu_context *smu, + enum pp_pm_phase_det_param_id id, uint32_t val) +{ + struct smu_dpm_context *dpm_ctxt = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + + pd_ctl = dpm_ctxt->pd_ctl; + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !pd_ctl) + return -EOPNOTSUPP; + + if (!pd_ctl->ops || !pd_ctl->ops->set) + return -EOPNOTSUPP; + + if (pd_ctl->status == SMU_PHASE_DET_DISABLED) + return -EPERM; + + return pd_ctl->ops->set(smu, id, val); +} + +int smu_get_phase_det_param(struct smu_context *smu, + enum pp_pm_phase_det_param_id id, uint32_t *val) +{ + struct smu_dpm_context *dpm_ctxt = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + + pd_ctl = dpm_ctxt->pd_ctl; + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !pd_ctl) + return -EOPNOTSUPP; + + if (!pd_ctl->ops || !pd_ctl->ops->get) + return -EOPNOTSUPP; + + return pd_ctl->ops->get(smu, id, val); +} + +int smu_phase_det_enable(struct smu_context *smu, bool enable) +{ + struct smu_dpm_context *dpm_ctxt = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + + pd_ctl = dpm_ctxt->pd_ctl; + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !pd_ctl) + return -EOPNOTSUPP; + + if (!pd_ctl->ops || !pd_ctl->ops->enable) + return -EOPNOTSUPP; + + if (pd_ctl->status == SMU_PHASE_DET_DISABLED) + return -EPERM; + + return pd_ctl->ops->enable(smu, enable); +} + +static int smu_phase_det_get_residency(struct smu_context *smu, uint32_t *res) +{ + struct smu_dpm_context *dpm_ctxt = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + + pd_ctl = dpm_ctxt->pd_ctl; + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !pd_ctl) + return -EOPNOTSUPP; + + if (!pd_ctl->ops || !pd_ctl->ops->get_residency) + return -EOPNOTSUPP; + + if (pd_ctl->status == SMU_PHASE_DET_DISABLED) + return -EPERM; + + return pd_ctl->ops->get_residency(smu, res); +} + +#if defined(CONFIG_DEBUG_FS) + +static int smu_phase_det_debugfs_get_residency(void *data, u64 *val) +{ + struct smu_context *smu = (struct smu_context *)data; + uint32_t res; + int r; + + r = smu_phase_det_get_residency(smu, &res); + if (r) + return r; + *val = res; + + return 0; +} + +static int smu_phase_det_debugfs_status(void *data, u64 *val) +{ + struct smu_context *smu = (struct smu_context *)data; + struct smu_dpm_context *dpm_ctxt = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + + pd_ctl = dpm_ctxt->pd_ctl; + + *val = pd_ctl->status; + + return 0; +} + +static int smu_phase_det_debugfs_enable(void *data, u64 val) +{ + struct smu_context *smu = (struct smu_context *)data; + struct amdgpu_device *adev = smu->adev; + + if (amdgpu_in_reset(adev) || adev->in_suspend) + return -EPERM; + + return smu_phase_det_enable(smu, !!val); +} + +#ifdef DEFINE_DEBUGFS_ATTRIBUTE +#define DEBUGFS_PHASE_DET_FOPS(param) \ + static int smu_phase_det_fops_##param##_get(void *data, u64 *val) \ + { \ + struct smu_context *smu = (struct smu_context *)data; \ + int r; \ + u32 v; \ + \ + r = smu_get_phase_det_param(smu, PP_PM_PHASE_DET_##param, &v); \ + *val = v; \ + return r; \ + } \ + \ + static int smu_phase_det_fops_##param##_set(void *data, u64 val) \ + { \ + struct smu_context *smu = (struct smu_context *)data; \ + struct amdgpu_device *adev = smu->adev; \ + \ + if (amdgpu_in_reset(adev) || adev->in_suspend) \ + return -EPERM; \ + \ + return smu_set_phase_det_param(smu, PP_PM_PHASE_DET_##param, \ + (u32)val); \ + } \ + DEFINE_DEBUGFS_ATTRIBUTE(smu_phase_det_fops_##param, \ + smu_phase_det_fops_##param##_get, \ + smu_phase_det_fops_##param##_set, "%llu\n") +#else +#define DEBUGFS_PHASE_DET_FOPS(param) \ + static int smu_phase_det_fops_##param##_get(void *data, u64 *val) \ + { \ + struct smu_context *smu = (struct smu_context *)data; \ + int r; \ + u32 v; \ + \ + r = smu_get_phase_det_param(smu, PP_PM_PHASE_DET_##param, &v); \ + *val = v; \ + return r; \ + } \ + \ + static int smu_phase_det_fops_##param##_set(void *data, u64 val) \ + { \ + struct smu_context *smu = (struct smu_context *)data; \ + struct amdgpu_device *adev = smu->adev; \ + \ + if (amdgpu_in_reset(adev) || adev->in_suspend) \ + return -EPERM; \ + \ + return smu_set_phase_det_param(smu, PP_PM_PHASE_DET_##param, \ + (u32)val); \ + } \ + DEFINE_SIMPLE_ATTRIBUTE(smu_phase_det_fops_##param, \ + smu_phase_det_fops_##param##_get, \ + smu_phase_det_fops_##param##_set, "%llu\n") +#endif + +DEBUGFS_PHASE_DET_FOPS(LO_FREQ); +DEBUGFS_PHASE_DET_FOPS(HI_FREQ); +DEBUGFS_PHASE_DET_FOPS(THRESH); +DEBUGFS_PHASE_DET_FOPS(ALPHA); +DEBUGFS_PHASE_DET_FOPS(HYST); + +#ifdef DEFINE_DEBUGFS_ATTRIBUTE +DEFINE_DEBUGFS_ATTRIBUTE(smu_phase_det_fops_en, smu_phase_det_debugfs_status, + smu_phase_det_debugfs_enable, "%llu\n"); +#else +DEFINE_SIMPLE_ATTRIBUTE(smu_phase_det_fops_en, smu_phase_det_debugfs_status, + smu_phase_det_debugfs_enable, "%llu\n"); +#endif + +#ifdef DEFINE_DEBUGFS_ATTRIBUTE +DEFINE_DEBUGFS_ATTRIBUTE(smu_phase_det_fops_res, + smu_phase_det_debugfs_get_residency, NULL, "%llu\n"); +#else +DEFINE_SIMPLE_ATTRIBUTE(smu_phase_det_fops_res, + smu_phase_det_debugfs_get_residency, NULL, "%llu\n"); +#endif + +#define DEBUGFS_CREATE_PHASE_DET_ATTR(name, param) \ + debugfs_create_file(#name, 0644, dir, smu, &smu_phase_det_fops_##param) + +#define AMDGPU_SMU_PHASE_DET "smu_phase_detect" +#endif + +void amdgpu_smu_phase_det_debugfs_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + + struct smu_context *smu = adev->powerplay.pp_handle; + struct smu_dpm_context *dpm_ctxt = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + struct dentry *dir; + + pd_ctl = dpm_ctxt->pd_ctl; + + if (!smu || !pd_ctl) + return; + + dir = debugfs_create_dir(AMDGPU_SMU_PHASE_DET, + adev_to_drm(adev)->primary->debugfs_root); + + debugfs_create_file("enable", 0644, dir, smu, &smu_phase_det_fops_en); + + if (pd_ctl->ops->get_residency) + debugfs_create_file("residency", 0444, dir, smu, + &smu_phase_det_fops_res); + + DEBUGFS_CREATE_PHASE_DET_ATTR(freq_lo, LO_FREQ); + DEBUGFS_CREATE_PHASE_DET_ATTR(freq_hi, HI_FREQ); + DEBUGFS_CREATE_PHASE_DET_ATTR(threshold, THRESH); + DEBUGFS_CREATE_PHASE_DET_ATTR(alpha, ALPHA); + DEBUGFS_CREATE_PHASE_DET_ATTR(hyst, HYST); + +#endif +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index b44a185d07e84..22670990e65f5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -383,6 +383,36 @@ struct smu_dpm_policy_ctxt { unsigned long policy_mask; }; +struct smu_phase_det_params { + uint32_t freq_hi; + uint32_t freq_lo; + uint32_t thresh; + uint32_t hyst; + uint32_t alpha; +}; + +struct smu_phase_det_ops { + int (*set)(struct smu_context *smu, enum pp_pm_phase_det_param_id id, + uint32_t val); + int (*get)(struct smu_context *smu, enum pp_pm_phase_det_param_id id, + uint32_t *val); + int (*enable)(struct smu_context *smu, bool enable); + int (*get_residency)(struct smu_context *smu, uint32_t *res); +}; + +enum phase_det_state { + SMU_PHASE_DET_OFF = 0, + SMU_PHASE_DET_ON = 1, + SMU_PHASE_DET_DISABLED = -1, +}; + +struct smu_phase_det_ctl { + struct smu_phase_det_params params; + struct smu_phase_det_ops *ops; + enum phase_det_state status; + uint32_t residency; +}; + struct smu_dpm_context { uint32_t dpm_context_size; void *dpm_context; @@ -394,6 +424,7 @@ struct smu_dpm_context { struct smu_power_state *dpm_current_power_state; struct mclock_latency_table *mclk_latency_table; struct smu_dpm_policy_ctxt *dpm_policies; + struct smu_phase_det_ctl *pd_ctl; }; struct smu_power_gate { @@ -1635,5 +1666,12 @@ int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, ssize_t smu_get_pm_policy_info(struct smu_context *smu, enum pp_pm_policy p_type, char *sysbuf); +int smu_set_phase_det_param(struct smu_context *smu, + enum pp_pm_phase_det_param_id id, uint32_t val); +int smu_get_phase_det_param(struct smu_context *smu, + enum pp_pm_phase_det_param_id id, uint32_t *val); +int smu_phase_det_enable(struct smu_context *smu, bool enable); +void amdgpu_smu_phase_det_debugfs_init(struct amdgpu_device *adev); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h index ee457a6f08130..c2fd0a4a13e5d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h @@ -25,7 +25,7 @@ #define SMU14_DRIVER_IF_V14_0_H //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x18 +#define PPTABLE_VERSION 0x1B #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -145,7 +145,7 @@ typedef enum { } FEATURE_BTC_e; // Debug Overrides Bitmask -#define DEBUG_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK 0x00000001 +#define DEBUG_OVERRIDE_NOT_USE 0x00000001 #define DEBUG_OVERRIDE_DISABLE_VOLT_LINK_DCN_FCLK 0x00000002 #define DEBUG_OVERRIDE_DISABLE_VOLT_LINK_MP0_FCLK 0x00000004 #define DEBUG_OVERRIDE_DISABLE_VOLT_LINK_VCN_DCFCLK 0x00000008 @@ -161,6 +161,7 @@ typedef enum { #define DEBUG_OVERRIDE_ENABLE_SOC_VF_BRINGUP_MODE 0x00002000 #define DEBUG_OVERRIDE_ENABLE_PER_WGP_RESIENCY 0x00004000 #define DEBUG_OVERRIDE_DISABLE_MEMORY_VOLTAGE_SCALING 0x00008000 +#define DEBUG_OVERRIDE_DFLL_BTC_FCW_LOG 0x00010000 // VR Mapping Bit Defines #define VR_MAPPING_VR_SELECT_MASK 0x01 @@ -391,6 +392,21 @@ typedef struct { EccInfo_t EccInfo[24]; } EccInfoTable_t; +#define EPCS_HIGH_POWER 600 +#define EPCS_NORMAL_POWER 450 +#define EPCS_LOW_POWER 300 +#define EPCS_SHORTED_POWER 150 +#define EPCS_NO_BOOTUP 0 + +typedef enum{ + EPCS_SHORTED_LIMIT, + EPCS_LOW_POWER_LIMIT, + EPCS_NORMAL_POWER_LIMIT, + EPCS_HIGH_POWER_LIMIT, + EPCS_NOT_CONFIGURED, + EPCS_STATUS_COUNT, +} EPCS_STATUS_e; + //D3HOT sequences typedef enum { BACO_SEQUENCE, @@ -662,7 +678,7 @@ typedef enum { } PP_GRTAVFS_FW_SEP_FUSE_e; #define PP_NUM_RTAVFS_PWL_ZONES 5 - +#define PP_NUM_PSM_DIDT_PWL_ZONES 3 // VBIOS or PPLIB configures telemetry slope and offset. Only slope expected to be set for SVI3 // Slope Q1.7, Offset Q1.2 @@ -746,10 +762,10 @@ typedef struct { uint16_t Padding; //Frequency changes - int16_t GfxclkFmin; // MHz - int16_t GfxclkFmax; // MHz - uint16_t UclkFmin; // MHz - uint16_t UclkFmax; // MHz + int16_t GfxclkFoffset; + uint16_t Padding1; + uint16_t UclkFmin; + uint16_t UclkFmax; uint16_t FclkFmin; uint16_t FclkFmax; @@ -770,19 +786,23 @@ typedef struct { uint8_t MaxOpTemp; uint8_t AdvancedOdModeEnabled; - uint8_t Padding1[3]; + uint8_t Padding2[3]; uint16_t GfxVoltageFullCtrlMode; uint16_t SocVoltageFullCtrlMode; uint16_t GfxclkFullCtrlMode; uint16_t UclkFullCtrlMode; uint16_t FclkFullCtrlMode; - uint16_t Padding2; + uint16_t Padding3; int16_t GfxEdc; int16_t GfxPccLimitControl; - uint32_t Spare[10]; + uint16_t GfxclkFmaxVmax; + uint8_t GfxclkFmaxVmaxTemperature; + uint8_t Padding4[1]; + + uint32_t Spare[9]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround } OverDriveTable_t; @@ -802,8 +822,8 @@ typedef struct { uint16_t VddSocVmax; //gfxclk - int16_t GfxclkFmin; // MHz - int16_t GfxclkFmax; // MHz + int16_t GfxclkFoffset; + uint16_t Padding; //uclk uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -828,7 +848,7 @@ typedef struct { uint8_t FanZeroRpmEnable; //temperature uint8_t MaxOpTemp; - uint8_t Padding[2]; + uint8_t Padding1[2]; //Full Ctrl uint16_t GfxVoltageFullCtrlMode; @@ -839,7 +859,7 @@ typedef struct { //EDC int16_t GfxEdc; int16_t GfxPccLimitControl; - int16_t Padding1; + int16_t Padding2; uint32_t Spare[5]; } OverDriveLimits_t; @@ -987,8 +1007,9 @@ typedef struct { uint16_t BaseClockDc; uint16_t GameClockDc; uint16_t BoostClockDc; - - uint32_t Reserved[4]; + uint16_t MaxReportedClock; + uint16_t Padding; + uint32_t Reserved[3]; } DriverReportedClocks_t; typedef struct { @@ -1132,7 +1153,7 @@ typedef struct { uint32_t DcModeMaxFreq [PPCLK_COUNT ]; // In MHz uint16_t GfxclkAibFmax; - uint16_t GfxclkFreqCap; + uint16_t GfxDpmPadding; //GFX Idle Power Settings uint16_t GfxclkFgfxoffEntry; // Entry in RLC stage (PLL), in Mhz @@ -1172,8 +1193,7 @@ typedef struct { uint32_t DvoFmaxLowScaler; //Unitless float // GFX DCS - uint16_t DcsGfxOffVoltage; //Voltage in mV(Q2) applied to VDDGFX when entering DCS GFXOFF phase - uint16_t PaddingDcs; + uint32_t PaddingDcs; uint16_t DcsMinGfxOffTime; //Minimum amount of time PMFW shuts GFX OFF as part of GFX DCS phase uint16_t DcsMaxGfxOffTime; //Maximum amount of time PMFW can shut GFX OFF as part of GFX DCS phase at a stretch. @@ -1205,8 +1225,7 @@ typedef struct { uint16_t DalDcModeMaxUclkFreq; uint8_t PaddingsMem[2]; //FCLK Section - uint16_t FclkDpmDisallowPstateFreq; //Frequency which FW will target when indicated that display config cannot support P-state. Set to 0 use FW calculated value - uint16_t PaddingFclk; + uint32_t PaddingFclk; // Link DPM Settings uint8_t PcieGenSpeed[NUM_LINK_LEVELS]; ///< 0:PciE-gen1 1:PciE-gen2 2:PciE-gen3 3:PciE-gen4 4:PciE-gen5 @@ -1215,12 +1234,19 @@ typedef struct { // SECTION: VDD_GFX AVFS uint8_t OverrideGfxAvfsFuses; - uint8_t GfxAvfsPadding[3]; + uint8_t GfxAvfsPadding[1]; + uint16_t DroopGBStDev; uint32_t SocHwRtAvfsFuses[PP_GRTAVFS_HW_FUSE_COUNT]; //new added for Soc domain uint32_t GfxL2HwRtAvfsFuses[PP_GRTAVFS_HW_FUSE_COUNT]; //see fusedoc for encoding //uint32_t GfxSeHwRtAvfsFuses[PP_GRTAVFS_HW_FUSE_COUNT]; - uint32_t spare_HwRtAvfsFuses[PP_GRTAVFS_HW_FUSE_COUNT]; + + uint16_t PsmDidt_Vcross[PP_NUM_PSM_DIDT_PWL_ZONES-1]; + uint32_t PsmDidt_StaticDroop_A[PP_NUM_PSM_DIDT_PWL_ZONES]; + uint32_t PsmDidt_StaticDroop_B[PP_NUM_PSM_DIDT_PWL_ZONES]; + uint32_t PsmDidt_DynDroop_A[PP_NUM_PSM_DIDT_PWL_ZONES]; + uint32_t PsmDidt_DynDroop_B[PP_NUM_PSM_DIDT_PWL_ZONES]; + uint32_t spare_HwRtAvfsFuses[19]; uint32_t SocCommonRtAvfs[PP_GRTAVFS_FW_COMMON_FUSE_COUNT]; uint32_t GfxCommonRtAvfs[PP_GRTAVFS_FW_COMMON_FUSE_COUNT]; @@ -1246,11 +1272,7 @@ typedef struct { uint32_t dGbV_dT_vmin; uint32_t dGbV_dT_vmax; - //Unused: PMFW-9370 - uint32_t V2F_vmin_range_low; - uint32_t V2F_vmin_range_high; - uint32_t V2F_vmax_range_low; - uint32_t V2F_vmax_range_high; + uint32_t PaddingV2F[4]; AvfsDcBtcParams_t DcBtcGfxParams; QuadraticInt_t SSCurve_GFX; @@ -1327,18 +1349,18 @@ typedef struct { uint16_t PsmDidtReleaseTimer; uint32_t PsmDidtStallPattern; //Will be written to both pattern 1 and didt_static_level_prog // CAC EDC - uint32_t Leakage_C0; // in IEEE float - uint32_t Leakage_C1; // in IEEE float - uint32_t Leakage_C2; // in IEEE float - uint32_t Leakage_C3; // in IEEE float - uint32_t Leakage_C4; // in IEEE float - uint32_t Leakage_C5; // in IEEE float - uint32_t GFX_CLK_SCALAR; // in IEEE float - uint32_t GFX_CLK_INTERCEPT; // in IEEE float - uint32_t GFX_CAC_M; // in IEEE float - uint32_t GFX_CAC_B; // in IEEE float - uint32_t VDD_GFX_CurrentLimitGuardband; // in IEEE float - uint32_t DynToTotalCacScalar; // in IEEE + uint32_t CacEdcCacLeakageC0; + uint32_t CacEdcCacLeakageC1; + uint32_t CacEdcCacLeakageC2; + uint32_t CacEdcCacLeakageC3; + uint32_t CacEdcCacLeakageC4; + uint32_t CacEdcCacLeakageC5; + uint32_t CacEdcGfxClkScalar; + uint32_t CacEdcGfxClkIntercept; + uint32_t CacEdcCac_m; + uint32_t CacEdcCac_b; + uint32_t CacEdcCurrLimitGuardband; + uint32_t CacEdcDynToTotalCacRatio; // GFX EDC XVMIN uint32_t XVmin_Gfx_EdcThreshScalar; uint32_t XVmin_Gfx_EdcEnableFreq; @@ -1467,7 +1489,7 @@ typedef struct { uint8_t VddqOffEnabled; uint8_t PaddingUmcFlags[2]; - uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued + uint32_t Paddign1; uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS uint8_t FuseWritePowerMuxPresent; @@ -1530,7 +1552,7 @@ typedef struct { int16_t FuzzyFan_ErrorSetDelta; int16_t FuzzyFan_ErrorRateSetDelta; int16_t FuzzyFan_PwmSetDelta; - uint16_t FuzzyFan_Reserved; + uint16_t FanPadding2; uint16_t FwCtfLimit[TEMP_COUNT]; @@ -1547,9 +1569,10 @@ typedef struct { uint16_t FanSpare[1]; uint8_t FanIntakeSensorSupport; uint8_t FanIntakePadding; - uint32_t FanAmbientPerfBoostThreshold; uint32_t FanSpare2[12]; + uint32_t ODFeatureCtrlMask; + uint16_t TemperatureLimit_Hynix; // In degrees Celsius. Memory temperature limit associated with Hynix uint16_t TemperatureLimit_Micron; // In degrees Celsius. Memory temperature limit associated with Micron uint16_t TemperatureFwCtfLimit_Hynix; @@ -1637,7 +1660,7 @@ typedef struct { uint16_t AverageDclk0Frequency ; uint16_t AverageVclk1Frequency ; uint16_t AverageDclk1Frequency ; - uint16_t PCIeBusy ; + uint16_t AveragePCIeBusy ; uint16_t dGPU_W_MAX ; uint16_t padding ; @@ -1665,12 +1688,12 @@ typedef struct { uint16_t AverageGfxActivity ; uint16_t AverageUclkActivity ; - uint16_t Vcn0ActivityPercentage ; + uint16_t AverageVcn0ActivityPercentage; uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; uint16_t AverageSocketPower; - uint16_t MovingAverageTotalBoardPower; + uint16_t AverageTotalBoardPower; uint16_t AvgTemperature[TEMP_COUNT]; uint16_t AvgTemperatureFanIntake; @@ -1684,7 +1707,8 @@ typedef struct { uint8_t ThrottlingPercentage[THROTTLER_COUNT]; - uint8_t padding1[3]; + uint8_t VmaxThrottlingPercentage; + uint8_t padding1[2]; //metrics for D3hot entry/exit and driver ARM msgs uint32_t D3HotEntryCountPerMode[D3HOT_SEQUENCE_COUNT]; @@ -1693,7 +1717,7 @@ typedef struct { uint16_t ApuSTAPMSmartShiftLimit; uint16_t ApuSTAPMLimit; - uint16_t MovingAvgApuSocketPower; + uint16_t AvgApuSocketPower; uint16_t AverageUclkActivity_MAX; @@ -1823,6 +1847,17 @@ typedef struct { #define TABLE_TRANSFER_FAILED 0xFF #define TABLE_TRANSFER_PENDING 0xAB +#define TABLE_PPT_FAILED 0x100 +#define TABLE_TDC_FAILED 0x200 +#define TABLE_TEMP_FAILED 0x400 +#define TABLE_FAN_TARGET_TEMP_FAILED 0x800 +#define TABLE_FAN_STOP_TEMP_FAILED 0x1000 +#define TABLE_FAN_START_TEMP_FAILED 0x2000 +#define TABLE_FAN_PWM_MIN_FAILED 0x4000 +#define TABLE_ACOUSTIC_TARGET_RPM_FAILED 0x8000 +#define TABLE_ACOUSTIC_LIMIT_RPM_FAILED 0x10000 +#define TABLE_MGPU_ACOUSTIC_TARGET_RPM_FAILED 0x20000 + // Table types #define TABLE_PPTABLE 0 #define TABLE_COMBO_PPTABLE 1 @@ -1849,5 +1884,6 @@ typedef struct { #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING 0x7 #define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 #define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 +#define IH_INTERRUPT_CONTEXT_ID_DYNAMIC_TABLE 0xA #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 0b3c2f54a3433..822c6425d90e0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -123,7 +123,7 @@ typedef enum { VOLTAGE_GUARDBAND_COUNT } GFX_GUARDBAND_e; -#define SMU_METRICS_TABLE_VERSION 0xC +#define SMU_METRICS_TABLE_VERSION 0xD typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -227,6 +227,10 @@ typedef struct __attribute__((packed, aligned(4))) { // PCIE LINK Speed and width uint32_t PCIeLinkSpeed; uint32_t PCIeLinkWidth; + + // PER XCD ACTIVITY + uint32_t GfxBusy[8]; + uint64_t GfxBusyAcc[8]; } MetricsTableX_t; typedef struct __attribute__((packed, aligned(4))) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 41cb681927e2f..fc7118b956774 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -93,7 +93,14 @@ #define PPSMC_MSG_SelectPLPDMode 0x40 #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SelectPstatePolicy 0x44 -#define PPSMC_Message_Count 0x45 +#define PPSMC_MSG_SetPhsDetWRbwThreshold 0x45 +#define PPSMC_MSG_SetPhsDetWRbwFreqHigh 0x46 +#define PPSMC_MSG_SetPhsDetWRbwFreqLow 0x47 +#define PPSMC_MSG_SetPhsDetWRbwHystDown 0x48 +#define PPSMC_MSG_SetPhsDetWRbwAlpha 0x49 +#define PPSMC_MSG_SetPhsDetOnOff 0x4A +#define PPSMC_MSG_GetPhsDetResidency 0x4B +#define PPSMC_Message_Count 0x4C //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h index de2e442281ffe..87ca5ceb1ece1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h @@ -92,7 +92,6 @@ //Resets #define PPSMC_MSG_PrepareMp1ForUnload 0x2E -#define PPSMC_MSG_Mode1Reset 0x2F //Set SystemVirtual DramAddrHigh #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30 @@ -119,11 +118,12 @@ //STB to dram log #define PPSMC_MSG_DumpSTBtoDram 0x3D -#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E -#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F +#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E +#define PPSMC_MSG_DummyUndefined 0x3F #define PPSMC_MSG_STBtoDramLogSetDramSize 0x40 #define PPSMC_MSG_SetOBMTraceBufferLogging 0x41 +#define PPSMC_MSG_UseProfilingMode 0x42 #define PPSMC_MSG_AllowGfxDcs 0x43 #define PPSMC_MSG_DisallowGfxDcs 0x44 #define PPSMC_MSG_EnableAudioStutterWA 0x45 @@ -135,6 +135,16 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B #define PPSMC_MSG_SetPriorityDeltaGain 0x4C #define PPSMC_MSG_AllowIHHostInterrupt 0x4D +#define PPSMC_MSG_EnableShadowDpm 0x4E #define PPSMC_MSG_Mode3Reset 0x4F -#define PPSMC_Message_Count 0x50 +#define PPSMC_MSG_SetDriverDramAddr 0x50 +#define PPSMC_MSG_SetToolsDramAddr 0x51 +#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52 +#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53 +#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54 +#define PPSMC_MSG_GetSvi3Voltage 0x55 +#define PPSMC_MSG_UpdatePolicy 0x56 +#define PPSMC_MSG_ExtPwrConnSupport 0x57 +#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58 +#define PPSMC_Message_Count 0x59 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index ac0dd6b97f8d5..fa7449b289cac 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -275,7 +275,14 @@ __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \ __SMU_DUMMY_MAP(SelectPstatePolicy), \ __SMU_DUMMY_MAP(MALLPowerController), \ - __SMU_DUMMY_MAP(MALLPowerState), + __SMU_DUMMY_MAP(MALLPowerState), \ + __SMU_DUMMY_MAP(SetPhsDetWRbwThreshold), \ + __SMU_DUMMY_MAP(SetPhsDetWRbwFreqHigh), \ + __SMU_DUMMY_MAP(SetPhsDetWRbwFreqLow), \ + __SMU_DUMMY_MAP(SetPhsDetWRbwHystDown), \ + __SMU_DUMMY_MAP(SetPhsDetWRbwAlpha), \ + __SMU_DUMMY_MAP(SetPhsDetOnOff), \ + __SMU_DUMMY_MAP(GetPhsDetResidency), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type @@ -439,7 +446,16 @@ enum smu_clk_type { __SMU_DUMMY_MAP(BACO_CG), \ __SMU_DUMMY_MAP(SOC_CG), \ __SMU_DUMMY_MAP(LOW_POWER_DCNCLKS), \ - __SMU_DUMMY_MAP(WHISPER_MODE), + __SMU_DUMMY_MAP(WHISPER_MODE), \ + __SMU_DUMMY_MAP(EDC_PWRBRK), \ + __SMU_DUMMY_MAP(SOC_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_PSM_DIDT), \ + __SMU_DUMMY_MAP(APT_ALL_ENABLE), \ + __SMU_DUMMY_MAP(APT_SQ_THROTTLE), \ + __SMU_DUMMY_MAP(APT_PF_DCS), \ + __SMU_DUMMY_MAP(GFX_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \ + __SMU_DUMMY_MAP(FAN_ABNORMAL), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index 46b456590a080..727d5b405435d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -28,7 +28,7 @@ #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6 -#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x26 +#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x2E #define FEATURE_MASK(feature) (1ULL << feature) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 9c3c48297cba0..727fed69fc38c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -250,7 +250,7 @@ static struct cmn2asic_mapping sienna_cichlid_workload_map[PP_SMC_POWER_PROFILE_ WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 16fcd9dcd202e..8981302b19c8e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1616,7 +1616,8 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) break; default: if (!ras || !adev->ras_enabled || - adev->gmc.xgmi.pending_reset) { + (adev->init_lvl->level == + AMDGPU_INIT_LEVEL_MINIMAL_XGMI)) { if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 2)) { data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 22737b11b1bfb..0bb9636d14c38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -451,7 +451,11 @@ static int vangogh_init_smc_tables(struct smu_context *smu) #ifdef CONFIG_X86 /* AMD x86 APU only */ +#ifdef HAVE_TOPOLOGY_NUM_CORES_PER_PACKAGE smu->cpu_core_num = topology_num_cores_per_package(); +#else + smu->cpu_core_num = boot_cpu_data.x86_max_cores; +#endif #else smu->cpu_core_num = 4; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index e17466cc19522..48f448b9bc4f5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -527,6 +527,7 @@ int smu_v13_0_fini_smc_tables(struct smu_context *smu) smu_table->watermarks_table = NULL; smu_table->metrics_time = 0; + kfree(smu_dpm->pd_ctl); kfree(smu_dpm->dpm_policies); kfree(smu_dpm->dpm_context); kfree(smu_dpm->golden_dpm_context); @@ -1022,8 +1023,7 @@ static int smu_v13_0_process_pending_interrupt(struct smu_context *smu) { int ret = 0; - if (smu->dc_controlled_by_gpio && - smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_ACDC_BIT)) ret = smu_v13_0_allow_ih_interrupt(smu); return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a887ab945dfa2..c9639141792f4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -3071,7 +3071,6 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .enable_mgpu_fan_boost = smu_v13_0_0_enable_mgpu_fan_boost, .get_power_limit = smu_v13_0_0_get_power_limit, .set_power_limit = smu_v13_0_0_set_power_limit, - .set_power_source = smu_v13_0_set_power_source, .get_power_profile_mode = smu_v13_0_0_get_power_profile_mode, .set_power_profile_mode = smu_v13_0_0_set_power_profile_mode, .run_btc = smu_v13_0_run_btc, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 78c3f94bb3ff6..a91a39bc44c3b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -102,6 +102,12 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); #define MCA_BANK_IPID(_ip, _hwid, _type) \ [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } +static inline bool smu_v13_0_6_is_unified_metrics(struct smu_context *smu) +{ + return (smu->adev->flags & AMD_IS_APU) && + smu->smc_fw_version <= 0x4556900; +} + struct mca_bank_ipid { enum amdgpu_mca_ip ip; uint16_t hwid; @@ -121,6 +127,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +#define P2S_TABLE_ID_3 0x50325303 // clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { @@ -175,6 +182,13 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SelectPstatePolicy, PPSMC_MSG_SelectPstatePolicy, 0), + MSG_MAP(SetPhsDetWRbwThreshold, PPSMC_MSG_SetPhsDetWRbwThreshold, 0), + MSG_MAP(SetPhsDetWRbwFreqHigh, PPSMC_MSG_SetPhsDetWRbwFreqHigh, 0), + MSG_MAP(SetPhsDetWRbwFreqLow, PPSMC_MSG_SetPhsDetWRbwFreqLow, 0), + MSG_MAP(SetPhsDetWRbwHystDown, PPSMC_MSG_SetPhsDetWRbwHystDown, 0), + MSG_MAP(SetPhsDetWRbwAlpha, PPSMC_MSG_SetPhsDetWRbwAlpha, 0), + MSG_MAP(SetPhsDetOnOff, PPSMC_MSG_SetPhsDetOnOff, 0), + MSG_MAP(GetPhsDetResidency, PPSMC_MSG_GetPhsDetResidency, 0), }; // clang-format on @@ -252,7 +266,7 @@ struct PPTable_t { #define SMUQ10_TO_UINT(x) ((x) >> 10) #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) -#define GET_METRIC_FIELD(field) ((adev->flags & AMD_IS_APU) ?\ +#define GET_METRIC_FIELD(field, flag) ((flag) ?\ (metrics_a->field) : (metrics_x->field)) struct smu_v13_0_6_dpm_map { @@ -271,14 +285,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; + int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; /* No need to load P2S tables in IOV mode */ if (amdgpu_sriov_vf(adev)) return 0; - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { p2s_table_id = P2S_TABLE_ID_X; + if (var == 0x5) + p2s_table_id = P2S_TABLE_ID_3; + } amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); @@ -347,7 +365,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_5); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_6); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -434,6 +452,132 @@ static int smu_v13_0_6_select_plpd_policy(struct smu_context *smu, int level) return ret; } +static int smu_v13_0_6_phase_det_set(struct smu_context *smu, + enum pp_pm_phase_det_param_id id, + uint32_t val) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + uint32_t *param; + int r, msg_id; + + pd_ctl = smu_dpm->pd_ctl; + if (!pd_ctl) + return -EINVAL; + + switch (id) { + case PP_PM_PHASE_DET_LO_FREQ: + msg_id = SMU_MSG_SetPhsDetWRbwFreqLow; + param = &pd_ctl->params.freq_lo; + break; + case PP_PM_PHASE_DET_HI_FREQ: + msg_id = SMU_MSG_SetPhsDetWRbwFreqHigh; + param = &pd_ctl->params.freq_hi; + break; + case PP_PM_PHASE_DET_THRESH: + msg_id = SMU_MSG_SetPhsDetWRbwThreshold; + param = &pd_ctl->params.thresh; + break; + case PP_PM_PHASE_DET_ALPHA: + msg_id = SMU_MSG_SetPhsDetWRbwAlpha; + param = &pd_ctl->params.alpha; + break; + case PP_PM_PHASE_DET_HYST: + msg_id = SMU_MSG_SetPhsDetWRbwHystDown; + param = &pd_ctl->params.hyst; + break; + default: + return -EINVAL; + } + + r = smu_cmn_send_smc_msg_with_param(smu, msg_id, val, NULL); + if (!r) + *param = val; + + return r; +} + +static int smu_v13_0_6_phase_det_get(struct smu_context *smu, + enum pp_pm_phase_det_param_id id, + uint32_t *val) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + + pd_ctl = smu_dpm->pd_ctl; + if (!pd_ctl || !val) + return -EINVAL; + + switch (id) { + case PP_PM_PHASE_DET_LO_FREQ: + *val = pd_ctl->params.freq_lo; + break; + case PP_PM_PHASE_DET_HI_FREQ: + *val = pd_ctl->params.freq_hi; + break; + case PP_PM_PHASE_DET_THRESH: + *val = pd_ctl->params.thresh; + break; + case PP_PM_PHASE_DET_ALPHA: + *val = pd_ctl->params.alpha; + break; + case PP_PM_PHASE_DET_HYST: + *val = pd_ctl->params.hyst; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int smu_v13_0_6_phase_det_enable(struct smu_context *smu, bool enable) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + int r; + + pd_ctl = smu_dpm->pd_ctl; + r = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPhsDetOnOff, enable, + NULL); + + if (!r) { + pd_ctl->status = enable ? SMU_PHASE_DET_ON : SMU_PHASE_DET_OFF; + } else { + dev_warn(smu->adev->dev, "Phase detect %s failed", + enable ? "enable" : "disable"); + pd_ctl->status = SMU_PHASE_DET_DISABLED; + } + + return r; +} + +static int smu_v13_0_6_phase_det_get_residency(struct smu_context *smu, + uint32_t *res) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + + pd_ctl = smu_dpm->pd_ctl; + + if (!res) + return -EINVAL; + + if (pd_ctl->status != SMU_PHASE_DET_ON) { + *res = 0; + return 0; + } + + return smu_cmn_send_smc_msg(smu, SMU_MSG_GetPhsDetResidency, res); +} + +static struct smu_phase_det_ops smu_v13_0_6_pd_ops = { + .set = smu_v13_0_6_phase_det_set, + .get = smu_v13_0_6_phase_det_get, + .enable = smu_v13_0_6_phase_det_enable, + .get_residency = smu_v13_0_6_phase_det_get_residency, +}; + static int smu_v13_0_6_allocate_dpm_context(struct smu_context *smu) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; @@ -452,6 +596,22 @@ static int smu_v13_0_6_allocate_dpm_context(struct smu_context *smu) return -ENOMEM; } + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) && + !(smu->adev->flags & AMD_IS_APU)) { + smu_dpm->pd_ctl = + kzalloc(sizeof(struct smu_phase_det_ctl), GFP_KERNEL); + if (!smu_dpm->pd_ctl) { + kfree(smu_dpm->dpm_policies); + kfree(smu_dpm->dpm_context); + return -ENOMEM; + } + + smu_dpm->pd_ctl->ops = &smu_v13_0_6_pd_ops; + smu_dpm->pd_ctl->status = SMU_PHASE_DET_OFF; + /* Init to 0xFF to indicate that present values are unknown */ + memset(&smu_dpm->pd_ctl->params, 0xFF, + sizeof(struct smu_phase_det_params)); + } if (!(smu->adev->flags & AMD_IS_APU)) { policy = &(smu_dpm->dpm_policies->policies[0]); @@ -578,7 +738,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; - struct amdgpu_device *adev = smu->adev; + bool flag = smu_v13_0_6_is_unified_metrics(smu); int ret, i, retry = 100; uint32_t table_version; @@ -590,7 +750,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return ret; /* Ensure that metrics have been updated */ - if (GET_METRIC_FIELD(AccumulationCounter)) + if (GET_METRIC_FIELD(AccumulationCounter, flag)) break; usleep_range(1000, 1100); @@ -607,29 +767,29 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) table_version; pptable->MaxSocketPowerLimit = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit, flag)); pptable->MaxGfxclkFrequency = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency, flag)); pptable->MinGfxclkFrequency = - SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency)); + SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency, flag)); for (i = 0; i < 4; ++i) { pptable->FclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable, flag)[i]); pptable->UclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable, flag)[i]); pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( - GET_METRIC_FIELD(SocclkFrequencyTable)[i]); + GET_METRIC_FIELD(SocclkFrequencyTable, flag)[i]); pptable->VclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable, flag)[i]); pptable->DclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable, flag)[i]); pptable->LclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable, flag)[i]); } /* use AID0 serial number by default */ - pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID)[0]; + pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID, flag)[0]; pptable->Init = true; } @@ -749,6 +909,7 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_table_context *smu_table = &smu->smu_table; struct smu_13_0_dpm_table *dpm_table = NULL; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; uint32_t gfxclkmin, gfxclkmax, levels; @@ -782,6 +943,14 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) ~BIT(PP_PM_POLICY_SOC_PSTATE); } + if (smu_dpm->pd_ctl && (smu->smc_fw_version < 0x00556E00)) { + kfree(smu_dpm->pd_ctl); + smu_dpm->pd_ctl = NULL; + } + + if (smu_dpm->pd_ctl && (smu->smc_fw_version < 0x00556F78)) + smu_dpm->pd_ctl->ops->get_residency = NULL; + smu_v13_0_6_pm_policy_init(smu); /* gfxclk dpm table setup */ dpm_table = &dpm_context->dpm_tables.gfx_table; @@ -952,6 +1121,7 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, struct smu_table_context *smu_table = &smu->smu_table; MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; + bool flag = smu_v13_0_6_is_unified_metrics(smu); struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -966,50 +1136,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_GFXCLK: if (smu->smc_fw_version >= 0x552F00) { xcc_id = GET_INST(GC, 0); - *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, flag)[xcc_id]); } else { *value = 0; } break; case METRICS_CURR_SOCCLK: case METRICS_AVERAGE_SOCCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, flag)[0]); break; case METRICS_CURR_UCLK: case METRICS_AVERAGE_UCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, flag)); break; case METRICS_CURR_VCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, flag)[0]); break; case METRICS_CURR_DCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, flag)[0]); break; case METRICS_CURR_FCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency, flag)); break; case METRICS_AVERAGE_GFXACTIVITY: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, flag)); break; case METRICS_AVERAGE_MEMACTIVITY: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag)); break; case METRICS_CURR_SOCKETPOWER: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)) << 8; + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag)) << 8; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, flag)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; /* This is the max of all VRs and not just SOC VR. * No need to define another data type for the same. */ case METRICS_TEMPERATURE_VRSOC: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, flag)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; default: @@ -2102,8 +2272,12 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap, } mutex_lock(&adev->pm.mutex); r = smu_v13_0_6_request_i2c_xfer(smu, req); - if (r) - goto fail; + if (r) { + /* Retry once, in case of an i2c collision */ + r = smu_v13_0_6_request_i2c_xfer(smu, req); + if (r) + goto fail; + } for (c = i = 0; i < num_msgs; i++) { if (!(msg[i].flags & I2C_M_RD)) { @@ -2290,14 +2464,18 @@ static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) { + bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst; struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_5 *gpu_metrics = - (struct gpu_metrics_v1_5 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_6 *gpu_metrics = + (struct gpu_metrics_v1_6 *)smu_table->gpu_metrics_table; + bool flag = smu_v13_0_6_is_unified_metrics(smu); + int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; - int ret = 0, xcc_id, inst, i, j; MetricsTableX_t *metrics_x; MetricsTableA_t *metrics_a; + struct amdgpu_xcp *xcp; u16 link_width_level; + u32 inst_mask; metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); @@ -2308,53 +2486,60 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_a = (MetricsTableA_t *)metrics_x; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 5); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 6); gpu_metrics->temperature_hotspot = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag)); /* Individual HBM stack temperature is not reported */ gpu_metrics->temperature_mem = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, flag)); /* Reports max temperature of all voltage rails */ gpu_metrics->temperature_vrsoc = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, flag)); gpu_metrics->average_gfx_activity = - SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy)); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, flag)); gpu_metrics->average_umc_activity = - SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization)); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag)); gpu_metrics->curr_socket_power = - SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower)); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag)); /* Energy counter reported in 15.259uJ (2^-16) units */ - gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc); + gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc, flag); for (i = 0; i < MAX_GFX_CLKS; i++) { xcc_id = GET_INST(GC, i); if (xcc_id >= 0) gpu_metrics->current_gfxclk[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency)[xcc_id]); + SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, flag)[xcc_id]); if (i < MAX_CLKS) { gpu_metrics->current_socclk[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, flag)[i]); inst = GET_INST(VCN, i); if (inst >= 0) { gpu_metrics->current_vclk0[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency)[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, flag)[inst]); gpu_metrics->current_dclk0[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency)[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, flag)[inst]); } } } - gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency)); + gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, flag)); - /* Throttle status is not reported through metrics now */ - gpu_metrics->throttle_status = 0; + /* Total accumulated cycle counter */ + gpu_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, flag); + + /* Accumulated throttler residencies */ + gpu_metrics->prochot_residency_acc = GET_METRIC_FIELD(ProchotResidencyAcc, flag); + gpu_metrics->ppt_residency_acc = GET_METRIC_FIELD(PptResidencyAcc, flag); + gpu_metrics->socket_thm_residency_acc = GET_METRIC_FIELD(SocketThmResidencyAcc, flag); + gpu_metrics->vr_thm_residency_acc = GET_METRIC_FIELD(VrThmResidencyAcc, flag); + gpu_metrics->hbm_thm_residency_acc = GET_METRIC_FIELD(HbmThmResidencyAcc, flag); /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ - gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak) >> GET_INST(GC, 0); + gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak, flag) >> GET_INST(GC, 0); if (!(adev->flags & AMD_IS_APU)) { /*Check smu version, PCIE link speed and width will be reported from pmfw metric @@ -2395,36 +2580,68 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); gpu_metrics->gfx_activity_acc = - SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc)); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc, flag)); gpu_metrics->mem_activity_acc = - SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc)); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc, flag)); for (i = 0; i < NUM_XGMI_LINKS; i++) { gpu_metrics->xgmi_read_data_acc[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc, flag)[i]); gpu_metrics->xgmi_write_data_acc[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc)[i]); - } + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, flag)[i]); + } + + gpu_metrics->num_partition = adev->xcp_mgr->num_xcps; + + apu_per_inst = (adev->flags & AMD_IS_APU) && (smu->smc_fw_version >= 0x04556A00); + smu_13_0_6_per_inst = !(adev->flags & AMD_IS_APU) && + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) + == IP_VERSION(13, 0, 6)) && + (smu->smc_fw_version >= 0x556F00); + smu_13_0_14_per_inst = !(adev->flags & AMD_IS_APU) && + (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) + == IP_VERSION(13, 0, 14)) && + (smu->smc_fw_version >= 0x05550B00); + + per_inst = apu_per_inst || smu_13_0_6_per_inst || smu_13_0_14_per_inst; + + for_each_xcp(adev->xcp_mgr, xcp, i) { + amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); + idx = 0; + for_each_inst(k, inst_mask) { + /* Both JPEG and VCN has same instances */ + inst = GET_INST(VCN, k); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + gpu_metrics->xcp_stats[i].jpeg_busy + [(idx * adev->jpeg.num_jpeg_rings) + j] = + SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy, flag) + [(inst * adev->jpeg.num_jpeg_rings) + j]); + } + gpu_metrics->xcp_stats[i].vcn_busy[idx] = + SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, flag)[inst]); + idx++; - for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - inst = GET_INST(JPEG, i); - for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { - gpu_metrics->jpeg_activity[(i * adev->jpeg.num_jpeg_rings) + j] = - SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy) - [(inst * adev->jpeg.num_jpeg_rings) + j]); } - } - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - inst = GET_INST(VCN, i); - gpu_metrics->vcn_activity[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy)[inst]); + if (per_inst) { + amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask); + idx = 0; + for_each_inst(k, inst_mask) { + inst = GET_INST(GC, k); + gpu_metrics->xcp_stats[i].gfx_busy_inst[idx] = + SMUQ10_ROUND(metrics_x->GfxBusy[inst]); + gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = + SMUQ10_ROUND(metrics_x->GfxBusyAcc[inst]); + idx++; + } + } } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth)); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate)); + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth, flag)); + gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate, flag)); - gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp); + gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, flag); *table = (void *)gpu_metrics; kfree(metrics_x); @@ -2638,6 +2855,23 @@ static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) return ret; } +static int smu_v13_0_6_post_init(struct smu_context *smu) +{ + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_phase_det_ctl *pd_ctl; + bool enable; + + pd_ctl = smu_dpm->pd_ctl; + + if (!pd_ctl || pd_ctl->status == SMU_PHASE_DET_DISABLED) + return 0; + + enable = (pd_ctl->status == SMU_PHASE_DET_ON) ? true : false; + smu_v13_0_6_phase_det_enable(smu, enable); + + return 0; +} + static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3283,6 +3517,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .i2c_fini = smu_v13_0_6_i2c_control_fini, .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, .send_rma_reason = smu_v13_0_6_send_rma_reason, + .post_init = smu_v13_0_6_post_init, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index a7d0231727e8f..7bc95c4043778 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2378,7 +2378,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf size += sysfs_emit_at(buf, size, " "); for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++) - size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i], + size += sysfs_emit_at(buf, size, "%d %-14s%s", i, amdgpu_pp_profile_name[i], (i == smu->power_profile_mode) ? "* " : " "); size += sysfs_emit_at(buf, size, "\n"); @@ -2408,7 +2408,7 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf do { \ size += sysfs_emit_at(buf, size, "%-30s", #field); \ for (j = 0; j <= PP_SMC_POWER_PROFILE_WINDOW3D; j++) \ - size += sysfs_emit_at(buf, size, "%-16d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ + size += sysfs_emit_at(buf, size, "%-18d", activity_monitor_external[j].DpmActivityMonitorCoeffInt.field); \ size += sysfs_emit_at(buf, size, "\n"); \ } while (0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 5913f9c60fe00..882e51044dfb6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -127,7 +127,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), @@ -200,6 +199,15 @@ static struct cmn2asic_mapping smu_v14_0_2_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + FEA_MAP(EDC_PWRBRK), + FEA_MAP(SOC_EDC_XVMIN), + FEA_MAP(GFX_PSM_DIDT), + FEA_MAP(APT_ALL_ENABLE), + FEA_MAP(APT_SQ_THROTTLE), + FEA_MAP(APT_PF_DCS), + FEA_MAP(GFX_EDC_XVMIN), + FEA_MAP(GFX_DIDT_XVMIN), + FEA_MAP(FAN_ABNORMAL), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, @@ -688,6 +696,9 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu) pcie_table->clk_freq[pcie_table->num_of_link_levels] = skutable->LclkFreq[link_level]; pcie_table->num_of_link_levels++; + + if (link_level == 0) + link_level++; } /* dcefclk dpm table setup */ @@ -1066,12 +1077,9 @@ static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, switch (od_feature_bit) { case PP_OD_FEATURE_GFXCLK_FMIN: - od_min_setting = overdrive_lowerlimits->GfxclkFmin; - od_max_setting = overdrive_upperlimits->GfxclkFmin; - break; case PP_OD_FEATURE_GFXCLK_FMAX: - od_min_setting = overdrive_lowerlimits->GfxclkFmax; - od_max_setting = overdrive_upperlimits->GfxclkFmax; + od_min_setting = overdrive_lowerlimits->GfxclkFoffset; + od_max_setting = overdrive_upperlimits->GfxclkFoffset; break; case PP_OD_FEATURE_UCLK_FMIN: od_min_setting = overdrive_lowerlimits->UclkFmin; @@ -1258,10 +1266,16 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, PP_OD_FEATURE_GFXCLK_BIT)) break; - size += sysfs_emit_at(buf, size, "OD_SCLK:\n"); - size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", - od_table->OverDriveTable.GfxclkFmin, - od_table->OverDriveTable.GfxclkFmax); + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + const OverDriveLimits_t * const overdrive_lowerlimits = + &pptable->SkuTable.OverDriveLimitsBasicMin; + + size += sysfs_emit_at(buf, size, "OD_SCLK_OFFSET:\n"); + size += sysfs_emit_at(buf, size, "0: %dMhz\n1: %uMhz\n", + overdrive_lowerlimits->GfxclkFoffset, + overdrive_upperlimits->GfxclkFoffset); break; case SMU_OD_MCLK: @@ -1403,7 +1417,7 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, PP_OD_FEATURE_GFXCLK_FMAX, NULL, &max_value); - size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "SCLK_OFFSET: %7dMhz %10uMhz\n", min_value, max_value); } @@ -1785,7 +1799,7 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - + uint32_t current_profile_mode = smu->power_profile_mode; smu->power_profile_mode = input[size]; if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) { @@ -1843,6 +1857,11 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, } } + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) + smu_v14_0_deep_sleep_control(smu, false); + else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) + smu_v14_0_deep_sleep_control(smu, true); + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, @@ -2115,50 +2134,6 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54); } -static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad page number on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetNumBadMemoryPagesRetired, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages number\n", - __func__); - - return ret; -} - -static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad channel info on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages channel info\n", - __func__); - - return ret; -} - -static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, - void *table) -{ - int ret = 0; - - // TODO - - return ret; -} - static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -2187,7 +2162,7 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; - gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, + gpu_metrics->average_mm_activity = max(metrics->AverageVcn0ActivityPercentage, metrics->Vcn1ActivityPercentage); gpu_metrics->average_socket_power = metrics->AverageSocketPower; @@ -2246,8 +2221,7 @@ static void smu_v14_0_2_dump_od_table(struct smu_context *smu, { struct amdgpu_device *adev = smu->adev; - dev_dbg(adev->dev, "OD: Gfxclk: (%d, %d)\n", od_table->OverDriveTable.GfxclkFmin, - od_table->OverDriveTable.GfxclkFmax); + dev_dbg(adev->dev, "OD: Gfxclk offset: (%d)\n", od_table->OverDriveTable.GfxclkFoffset); dev_dbg(adev->dev, "OD: Uclk: (%d, %d)\n", od_table->OverDriveTable.UclkFmin, od_table->OverDriveTable.UclkFmax); } @@ -2338,10 +2312,8 @@ static int smu_v14_0_2_set_default_od_settings(struct smu_context *smu) memcpy(user_od_table, boot_od_table, sizeof(OverDriveTableExternal_t)); - user_od_table->OverDriveTable.GfxclkFmin = - user_od_table_bak.OverDriveTable.GfxclkFmin; - user_od_table->OverDriveTable.GfxclkFmax = - user_od_table_bak.OverDriveTable.GfxclkFmax; + user_od_table->OverDriveTable.GfxclkFoffset = + user_od_table_bak.OverDriveTable.GfxclkFoffset; user_od_table->OverDriveTable.UclkFmin = user_od_table_bak.OverDriveTable.UclkFmin; user_od_table->OverDriveTable.UclkFmax = @@ -2470,22 +2442,6 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, } switch (input[i]) { - case 0: - smu_v14_0_2_get_od_setting_limits(smu, - PP_OD_FEATURE_GFXCLK_FMIN, - &minimum, - &maximum); - if (input[i + 1] < minimum || - input[i + 1] > maximum) { - dev_info(adev->dev, "GfxclkFmin (%ld) must be within [%u, %u]!\n", - input[i + 1], minimum, maximum); - return -EINVAL; - } - - od_table->OverDriveTable.GfxclkFmin = input[i + 1]; - od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; - break; - case 1: smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_GFXCLK_FMAX, @@ -2498,7 +2454,7 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, return -EINVAL; } - od_table->OverDriveTable.GfxclkFmax = input[i + 1]; + od_table->OverDriveTable.GfxclkFoffset = input[i + 1]; od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; break; @@ -2509,13 +2465,6 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, } } - if (od_table->OverDriveTable.GfxclkFmin > od_table->OverDriveTable.GfxclkFmax) { - dev_err(adev->dev, - "Invalid setting: GfxclkFmin(%u) is bigger than GfxclkFmax(%u)\n", - (uint32_t)od_table->OverDriveTable.GfxclkFmin, - (uint32_t)od_table->OverDriveTable.GfxclkFmax); - return -EINVAL; - } break; case PP_OD_EDIT_MCLK_VDDC_TABLE: @@ -2877,7 +2826,6 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_unique_id = smu_v14_0_2_get_unique_id, .get_power_limit = smu_v14_0_2_get_power_limit, .set_power_limit = smu_v14_0_2_set_power_limit, - .set_power_source = smu_v14_0_set_power_source, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, .set_power_profile_mode = smu_v14_0_2_set_power_profile_mode, .run_btc = smu_v14_0_run_btc, @@ -2897,12 +2845,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_gfx_features = smu_v14_0_2_enable_gfx_features, .set_mp1_state = smu_v14_0_2_set_mp1_state, .set_df_cstate = smu_v14_0_2_set_df_cstate, - .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num, - .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag, #if 0 .gpo_control = smu_v14_0_gpo_control, #endif - .get_ecc_info = smu_v14_0_2_get_ecc_info, }; void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 88eefef05faed..63c4f75fa1183 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1078,6 +1078,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(1, 5): structure_size = sizeof(struct gpu_metrics_v1_5); break; + case METRICS_VERSION(1, 6): + structure_size = sizeof(struct gpu_metrics_v1_6); + break; case METRICS_VERSION(2, 0): structure_size = sizeof(struct gpu_metrics_v2_0); break; diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index fc2ceae61db2d..c4a662ae7e884 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -89,7 +89,7 @@ static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb, struct drm_dp_mst_port *port); static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, - u8 *guid); + guid_t *guid); static int drm_dp_mst_register_i2c_bus(struct drm_dp_mst_port *port); static void drm_dp_mst_unregister_i2c_bus(struct drm_dp_mst_port *port); @@ -801,7 +801,7 @@ static bool drm_dp_sideband_parse_link_address(const struct drm_dp_mst_topology_ int idx = 1; int i; - memcpy(repmsg->u.link_addr.guid, &raw->msg[idx], 16); + import_guid(&repmsg->u.link_addr.guid, &raw->msg[idx]); idx += 16; repmsg->u.link_addr.nports = raw->msg[idx] & 0xf; idx++; @@ -829,7 +829,7 @@ static bool drm_dp_sideband_parse_link_address(const struct drm_dp_mst_topology_ idx++; if (idx > raw->curlen) goto fail_len; - memcpy(repmsg->u.link_addr.ports[i].peer_guid, &raw->msg[idx], 16); + import_guid(&repmsg->u.link_addr.ports[i].peer_guid, &raw->msg[idx]); idx += 16; if (idx > raw->curlen) goto fail_len; @@ -1029,7 +1029,7 @@ static bool drm_dp_sideband_parse_reply(const struct drm_dp_mst_topology_mgr *mg msg->req_type = (raw->msg[0] & 0x7f); if (msg->reply_type == DP_SIDEBAND_REPLY_NAK) { - memcpy(msg->u.nak.guid, &raw->msg[1], 16); + import_guid(&msg->u.nak.guid, &raw->msg[1]); msg->u.nak.reason = raw->msg[17]; msg->u.nak.nak_data = raw->msg[18]; return false; @@ -1078,7 +1078,7 @@ drm_dp_sideband_parse_connection_status_notify(const struct drm_dp_mst_topology_ if (idx > raw->curlen) goto fail_len; - memcpy(msg->u.conn_stat.guid, &raw->msg[idx], 16); + import_guid(&msg->u.conn_stat.guid, &raw->msg[idx]); idx += 16; if (idx > raw->curlen) goto fail_len; @@ -1107,7 +1107,7 @@ static bool drm_dp_sideband_parse_resource_status_notify(const struct drm_dp_mst if (idx > raw->curlen) goto fail_len; - memcpy(msg->u.resource_stat.guid, &raw->msg[idx], 16); + import_guid(&msg->u.resource_stat.guid, &raw->msg[idx]); idx += 16; if (idx > raw->curlen) goto fail_len; @@ -2174,20 +2174,24 @@ ssize_t drm_dp_mst_dpcd_write(struct drm_dp_aux *aux, offset, size, buffer); } -static int drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid) +static int drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, guid_t *guid) { int ret = 0; - memcpy(mstb->guid, guid, 16); + guid_copy(&mstb->guid, guid); + + if (!drm_dp_validate_guid(mstb->mgr, &mstb->guid)) { + u8 buf[UUID_SIZE]; + + export_guid(buf, &mstb->guid); - if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) { if (mstb->port_parent) { ret = drm_dp_send_dpcd_write(mstb->mgr, mstb->port_parent, - DP_GUID, 16, mstb->guid); + DP_GUID, sizeof(buf), buf); } else { ret = drm_dp_dpcd_write(mstb->mgr->aux, - DP_GUID, mstb->guid, 16); + DP_GUID, buf, sizeof(buf)); } } @@ -2570,9 +2574,9 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ return mstb; } -static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( - struct drm_dp_mst_branch *mstb, - const uint8_t *guid) +static struct drm_dp_mst_branch * +get_mst_branch_device_by_guid_helper(struct drm_dp_mst_branch *mstb, + const guid_t *guid) { struct drm_dp_mst_branch *found_mstb; struct drm_dp_mst_port *port; @@ -2580,10 +2584,9 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( if (!mstb) return NULL; - if (memcmp(mstb->guid, guid, 16) == 0) + if (guid_equal(&mstb->guid, guid)) return mstb; - list_for_each_entry(port, &mstb->ports, next) { found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid); @@ -2596,7 +2599,7 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( static struct drm_dp_mst_branch * drm_dp_get_mst_branch_device_by_guid(struct drm_dp_mst_topology_mgr *mgr, - const uint8_t *guid) + const guid_t *guid) { struct drm_dp_mst_branch *mstb; int ret; @@ -2693,17 +2696,12 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work) } static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, - u8 *guid) + guid_t *guid) { - u64 salt; - - if (memchr_inv(guid, 0, 16)) + if (!guid_is_null(guid)) return true; - salt = get_jiffies_64(); - - memcpy(&guid[0], &salt, sizeof(u64)); - memcpy(&guid[8], &salt, sizeof(u64)); + guid_gen(guid); return false; } @@ -2943,7 +2941,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, drm_dbg_kms(mgr->dev, "link address reply: %d\n", reply->nports); drm_dp_dump_link_address(mgr, reply); - ret = drm_dp_check_mstb_guid(mstb, reply->guid); + ret = drm_dp_check_mstb_guid(mstb, &reply->guid); if (ret) { char buf[64]; @@ -3770,8 +3768,9 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend); int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, bool sync) { + u8 buf[UUID_SIZE]; + guid_t guid; int ret; - u8 guid[16]; mutex_lock(&mgr->lock); if (!mgr->mst_primary) @@ -3792,13 +3791,15 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, } /* Some hubs forget their guids after they resume */ - ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); - if (ret != 16) { + ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, buf, sizeof(buf)); + if (ret != sizeof(buf)) { drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); goto out_fail; } - ret = drm_dp_check_mstb_guid(mgr->mst_primary, guid); + import_guid(&guid, buf); + + ret = drm_dp_check_mstb_guid(mgr->mst_primary, &guid); if (ret) { drm_dbg_kms(mgr->dev, "check mstb failed - undocked during suspend?\n"); goto out_fail; @@ -3976,12 +3977,12 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, bool hotplug = false, dowork = false; if (hdr->broadcast) { - const u8 *guid = NULL; + const guid_t *guid = NULL; if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) - guid = msg->u.conn_stat.guid; + guid = &msg->u.conn_stat.guid; else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY) - guid = msg->u.resource_stat.guid; + guid = &msg->u.resource_stat.guid; if (guid) mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 103c185bb1c8a..ca42e6081d27c 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -324,7 +324,7 @@ EXPORT_SYMBOL(drm_buddy_init); */ void drm_buddy_fini(struct drm_buddy *mm) { - u64 root_size, size; + u64 root_size, size, start; unsigned int order; int i; @@ -332,7 +332,8 @@ void drm_buddy_fini(struct drm_buddy *mm) for (i = 0; i < mm->n_roots; ++i) { order = ilog2(size) - ilog2(mm->chunk_size); - __force_merge(mm, 0, size, order); + start = drm_buddy_block_offset(mm->roots[i]); + __force_merge(mm, start, start + size, order); WARN_ON(!drm_buddy_block_is_free(mm->roots[i])); drm_block_free(mm, mm->roots[i]); diff --git a/drivers/gpu/drm/drm_fbdev_ttm.c b/drivers/gpu/drm/drm_fbdev_ttm.c index bb7898cd7dc63..119ffb28aaf95 100644 --- a/drivers/gpu/drm/drm_fbdev_ttm.c +++ b/drivers/gpu/drm/drm_fbdev_ttm.c @@ -84,7 +84,8 @@ static int drm_fbdev_ttm_helper_fb_probe(struct drm_fb_helper *fb_helper, sizes->surface_width, sizes->surface_height, sizes->surface_bpp); - format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); + format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, + sizes->surface_depth); buffer = drm_client_framebuffer_create(client, sizes->surface_width, sizes->surface_height, format); if (IS_ERR(buffer)) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3d127127e7cb0..3f84d7527793e 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -420,13 +420,20 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"), }, .driver_data = (void *)&lcd1280x1920_rightside_up, - }, { /* Valve Steam Deck */ + }, { /* Valve Steam Deck (Jupiter) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"), DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Valve Steam Deck (Galileo) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galileo"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 03bd3c7bd0dc2..0e3f8adf162f6 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -410,22 +410,30 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, } /** - * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers + * drm_gem_prime_handle_to_dmabuf - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure * @handle: buffer handle to export * @flags: flags like DRM_CLOEXEC - * @prime_fd: pointer to storage for the fd id of the create dma-buf * * This is the PRIME export function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. + * + * Unlike drm_gem_prime_handle_to_fd(), it returns the struct dma_buf it + * has created, without attaching it to any file descriptors. The difference + * between those two is similar to that between anon_inode_getfile() and + * anon_inode_getfd(); insertion into descriptor table is something you + * can not revert if any cleanup is needed, so the descriptor-returning + * variants should only be used when you are past the last failure exit + * and the only thing left is passing the new file descriptor to userland. + * When all you need is the object itself or when you need to do something + * else that might fail, use that one instead. */ -int drm_gem_prime_handle_to_fd(struct drm_device *dev, +struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, - uint32_t flags, - int *prime_fd) + uint32_t flags) { struct drm_gem_object *obj; int ret = 0; @@ -434,14 +442,14 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev, mutex_lock(&file_priv->prime.lock); obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { - ret = -ENOENT; + dmabuf = ERR_PTR(-ENOENT); goto out_unlock; } dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle); if (dmabuf) { get_dma_buf(dmabuf); - goto out_have_handle; + goto out; } mutex_lock(&dev->object_name_lock); @@ -463,7 +471,6 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev, /* normally the created dma-buf takes ownership of the ref, * but if that fails then drop the ref */ - ret = PTR_ERR(dmabuf); mutex_unlock(&dev->object_name_lock); goto out; } @@ -478,34 +485,51 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev, ret = drm_prime_add_buf_handle(&file_priv->prime, dmabuf, handle); mutex_unlock(&dev->object_name_lock); - if (ret) - goto fail_put_dmabuf; - -out_have_handle: - ret = dma_buf_fd(dmabuf, flags); - /* - * We must _not_ remove the buffer from the handle cache since the newly - * created dma buf is already linked in the global obj->dma_buf pointer, - * and that is invariant as long as a userspace gem handle exists. - * Closing the handle will clean out the cache anyway, so we don't leak. - */ - if (ret < 0) { - goto fail_put_dmabuf; - } else { - *prime_fd = ret; - ret = 0; + if (ret) { + dma_buf_put(dmabuf); + dmabuf = ERR_PTR(ret); } - - goto out; - -fail_put_dmabuf: - dma_buf_put(dmabuf); out: drm_gem_object_put(obj); out_unlock: mutex_unlock(&file_priv->prime.lock); + return dmabuf; +} +EXPORT_SYMBOL(drm_gem_prime_handle_to_dmabuf); - return ret; +/** + * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers + * @dev: dev to export the buffer from + * @file_priv: drm file-private structure + * @handle: buffer handle to export + * @flags: flags like DRM_CLOEXEC + * @prime_fd: pointer to storage for the fd id of the create dma-buf + * + * This is the PRIME export function which must be used mandatorily by GEM + * drivers to ensure correct lifetime management of the underlying GEM object. + * The actual exporting from GEM object to a dma-buf is done through the + * &drm_gem_object_funcs.export callback. + */ +int drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, + int *prime_fd) +{ + struct dma_buf *dmabuf; + int fd = get_unused_fd_flags(flags); + + if (fd < 0) + return fd; + + dmabuf = drm_gem_prime_handle_to_dmabuf(dev, file_priv, handle, flags); + if (IS_ERR(dmabuf)) { + put_unused_fd(fd); + return PTR_ERR(dmabuf); + } + + fd_install(fd, dmabuf->file); + *prime_fd = fd; + return 0; } EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index cc3571e25a9ac..c6b4cd77df729 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -131,7 +131,7 @@ * guaranteed to be enabled. * * On many hardware disabling the vblank interrupt cannot be done in a race-free - * manner, see &drm_driver.vblank_disable_immediate and + * manner, see &drm_vblank_crtc_config.disable_immediate and * &drm_driver.max_vblank_count. In that case the vblank core only disables the * vblanks after a timer has expired, which can be configured through the * ``vblankoffdelay`` module parameter. @@ -1241,6 +1241,7 @@ EXPORT_SYMBOL(drm_crtc_vblank_get); void drm_vblank_put(struct drm_device *dev, unsigned int pipe) { struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); + int vblank_offdelay = vblank->config.offdelay_ms; if (drm_WARN_ON(dev, pipe >= dev->num_crtcs)) return; @@ -1250,13 +1251,13 @@ void drm_vblank_put(struct drm_device *dev, unsigned int pipe) /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&vblank->refcount)) { - if (drm_vblank_offdelay == 0) + if (!vblank_offdelay) return; - else if (drm_vblank_offdelay < 0) + else if (vblank_offdelay < 0) vblank_disable_fn(&vblank->disable_timer); - else if (!dev->vblank_disable_immediate) + else if (!vblank->config.disable_immediate) mod_timer(&vblank->disable_timer, - jiffies + ((drm_vblank_offdelay * HZ)/1000)); + jiffies + ((vblank_offdelay * HZ) / 1000)); } } @@ -1265,7 +1266,8 @@ void drm_vblank_put(struct drm_device *dev, unsigned int pipe) * @crtc: which counter to give up * * Release ownership of a given vblank counter, turning off interrupts - * if possible. Disable interrupts after drm_vblank_offdelay milliseconds. + * if possible. Disable interrupts after &drm_vblank_crtc_config.offdelay_ms + * milliseconds. */ void drm_crtc_vblank_put(struct drm_crtc *crtc) { @@ -1466,16 +1468,20 @@ void drm_crtc_set_max_vblank_count(struct drm_crtc *crtc, EXPORT_SYMBOL(drm_crtc_set_max_vblank_count); /** - * drm_crtc_vblank_on - enable vblank events on a CRTC + * drm_crtc_vblank_on_config - enable vblank events on a CRTC with custom + * configuration options * @crtc: CRTC in question + * @config: Vblank configuration value * - * This functions restores the vblank interrupt state captured with - * drm_crtc_vblank_off() again and is generally called when enabling @crtc. Note - * that calls to drm_crtc_vblank_on() and drm_crtc_vblank_off() can be - * unbalanced and so can also be unconditionally called in driver load code to - * reflect the current hardware state of the crtc. + * See drm_crtc_vblank_on(). In addition, this function allows you to provide a + * custom vblank configuration for a given CRTC. + * + * Note that @config is copied, the pointer does not need to stay valid beyond + * this function call. For details of the parameters see + * struct drm_vblank_crtc_config. */ -void drm_crtc_vblank_on(struct drm_crtc *crtc) +void drm_crtc_vblank_on_config(struct drm_crtc *crtc, + const struct drm_vblank_crtc_config *config) { struct drm_device *dev = crtc->dev; unsigned int pipe = drm_crtc_index(crtc); @@ -1488,6 +1494,8 @@ void drm_crtc_vblank_on(struct drm_crtc *crtc) drm_dbg_vbl(dev, "crtc %d, vblank enabled %d, inmodeset %d\n", pipe, vblank->enabled, vblank->inmodeset); + vblank->config = *config; + /* Drop our private "prevent drm_vblank_get" refcount */ if (vblank->inmodeset) { atomic_dec(&vblank->refcount); @@ -1500,10 +1508,33 @@ void drm_crtc_vblank_on(struct drm_crtc *crtc) * re-enable interrupts if there are users left, or the * user wishes vblank interrupts to be enabled all the time. */ - if (atomic_read(&vblank->refcount) != 0 || drm_vblank_offdelay == 0) + if (atomic_read(&vblank->refcount) != 0 || !vblank->config.offdelay_ms) drm_WARN_ON(dev, drm_vblank_enable(dev, pipe)); spin_unlock_irq(&dev->vbl_lock); } +EXPORT_SYMBOL(drm_crtc_vblank_on_config); + +/** + * drm_crtc_vblank_on - enable vblank events on a CRTC + * @crtc: CRTC in question + * + * This functions restores the vblank interrupt state captured with + * drm_crtc_vblank_off() again and is generally called when enabling @crtc. Note + * that calls to drm_crtc_vblank_on() and drm_crtc_vblank_off() can be + * unbalanced and so can also be unconditionally called in driver load code to + * reflect the current hardware state of the crtc. + * + * Note that unlike in drm_crtc_vblank_on_config(), default values are used. + */ +void drm_crtc_vblank_on(struct drm_crtc *crtc) +{ + const struct drm_vblank_crtc_config config = { + .offdelay_ms = drm_vblank_offdelay, + .disable_immediate = crtc->dev->vblank_disable_immediate + }; + + drm_crtc_vblank_on_config(crtc, &config); +} EXPORT_SYMBOL(drm_crtc_vblank_on); static void drm_vblank_restore(struct drm_device *dev, unsigned int pipe) @@ -1556,16 +1587,21 @@ static void drm_vblank_restore(struct drm_device *dev, unsigned int pipe) * * Note that drivers must have race-free high-precision timestamping support, * i.e. &drm_crtc_funcs.get_vblank_timestamp must be hooked up and - * &drm_driver.vblank_disable_immediate must be set to indicate the + * &drm_vblank_crtc_config.disable_immediate must be set to indicate the * time-stamping functions are race-free against vblank hardware counter * increments. */ void drm_crtc_vblank_restore(struct drm_crtc *crtc) { - WARN_ON_ONCE(!crtc->funcs->get_vblank_timestamp); - WARN_ON_ONCE(!crtc->dev->vblank_disable_immediate); + struct drm_device *dev = crtc->dev; + unsigned int pipe = drm_crtc_index(crtc); + struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); + + drm_WARN_ON_ONCE(dev, !crtc->funcs->get_vblank_timestamp); + drm_WARN_ON_ONCE(dev, vblank->inmodeset); + drm_WARN_ON_ONCE(dev, !vblank->config.disable_immediate); - drm_vblank_restore(crtc->dev, drm_crtc_index(crtc)); + drm_vblank_restore(dev, pipe); } EXPORT_SYMBOL(drm_crtc_vblank_restore); @@ -1754,7 +1790,7 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, /* If the counter is currently enabled and accurate, short-circuit * queries to return the cached timestamp of the last vblank. */ - if (dev->vblank_disable_immediate && + if (vblank->config.disable_immediate && drm_wait_vblank_is_query(vblwait) && READ_ONCE(vblank->enabled)) { drm_wait_vblank_reply(dev, pipe, &vblwait->reply); @@ -1918,8 +1954,8 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe) * been signaled. The disable has to be last (after * drm_handle_vblank_events) so that the timestamp is always accurate. */ - disable_irq = (dev->vblank_disable_immediate && - drm_vblank_offdelay > 0 && + disable_irq = (vblank->config.disable_immediate && + vblank->config.offdelay_ms > 0 && !atomic_read(&vblank->refcount)); drm_handle_vblank_events(dev, pipe); @@ -1992,7 +2028,8 @@ int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, pipe = drm_crtc_index(crtc); vblank = drm_crtc_vblank_crtc(crtc); - vblank_enabled = dev->vblank_disable_immediate && READ_ONCE(vblank->enabled); + vblank_enabled = READ_ONCE(vblank->config.disable_immediate) && + READ_ONCE(vblank->enabled); if (!vblank_enabled) { ret = drm_crtc_vblank_get(crtc); diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 938832a6af153..b06aa473102b3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1001,6 +1001,9 @@ nouveau_connector_get_modes(struct drm_connector *connector) struct drm_display_mode *mode; mode = drm_mode_duplicate(dev, nv_connector->native_mode); + if (!mode) + return 0; + drm_mode_probed_add(connector, mode); ret = 1; } diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index b8a84f26b3ef8..b5e7b919f241e 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -86,15 +86,15 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, int ret = 0; void *out_alloc; + if (!in->count) + return NULL; + /* User stride must be at least the minimum object size, otherwise it might * lack useful information. */ if (in->stride < min_stride) return ERR_PTR(-EINVAL); - if (!in->count) - return NULL; - out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); if (!out_alloc) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 42929e1471077..463bcd3cf00f3 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -458,6 +458,16 @@ struct panthor_queue { /** @seqno: Sequence number of the last initialized fence. */ atomic64_t seqno; + /** + * @last_fence: Fence of the last submitted job. + * + * We return this fence when we get an empty command stream. + * This way, we are guaranteed that all earlier jobs have completed + * when drm_sched_job::s_fence::finished without having to feed + * the CS ring buffer with a dummy job that only signals the fence. + */ + struct dma_fence *last_fence; + /** * @in_flight_jobs: List containing all in-flight jobs. * @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_kernel_bo_destroy(queue->ringbuf); panthor_kernel_bo_destroy(queue->iface.mem); + /* Release the last_fence we were holding, if any. */ + dma_fence_put(queue->fence_ctx.last_fence); + kfree(queue); } @@ -2784,9 +2797,6 @@ static void group_sync_upd_work(struct work_struct *work) spin_lock(&queue->fence_ctx.lock); list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { - if (!job->call_info.size) - continue; - if (syncobj->seqno < job->done_fence->seqno) break; @@ -2865,11 +2875,14 @@ queue_run_job(struct drm_sched_job *sched_job) static_assert(sizeof(call_instrs) % 64 == 0, "call_instrs is not aligned on a cacheline"); - /* Stream size is zero, nothing to do => return a NULL fence and let - * drm_sched signal the parent. + /* Stream size is zero, nothing to do except making sure all previously + * submitted jobs are done before we signal the + * drm_sched_job::s_fence::finished fence. */ - if (!job->call_info.size) - return NULL; + if (!job->call_info.size) { + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); + return dma_fence_get(job->done_fence); + } ret = pm_runtime_resume_and_get(ptdev->base.dev); if (drm_WARN_ON(&ptdev->base, ret)) @@ -2929,6 +2942,10 @@ queue_run_job(struct drm_sched_job *sched_job) panthor_devfreq_record_busy(sched->ptdev); } + /* Update the last fence. */ + dma_fence_put(queue->fence_ctx.last_fence); + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); + done_fence = dma_fence_get(job->done_fence); out_unlock: @@ -3379,10 +3396,15 @@ panthor_job_create(struct panthor_file *pfile, goto err_put_job; } - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); - if (!job->done_fence) { - ret = -ENOMEM; - goto err_put_job; + /* Empty command streams don't need a fence, they'll pick the one from + * the previously submitted job. + */ + if (job->call_info.size) { + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); + if (!job->done_fence) { + ret = -ENOMEM; + goto err_put_job; + } } ret = drm_sched_job_init(&job->base, diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 1b2d31c4d77ca..ac77d1246b945 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -2104,7 +2104,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = radeon_get_ib_value(p, idx+1) << 8; + offset = (u64)radeon_get_ib_value(p, idx+1) << 8; if (offset != track->vgt_strmout_bo_offset[idx_value]) { DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%llx, 0x%x\n", offset, track->vgt_strmout_bo_offset[idx_value]); diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile index 53863621829f1..b5a6b6a6203af 100644 --- a/drivers/gpu/drm/scheduler/Makefile +++ b/drivers/gpu/drm/scheduler/Makefile @@ -20,6 +20,19 @@ # OTHER DEALINGS IN THE SOFTWARE. # # -gpu-sched-y := sched_main.o sched_fence.o sched_entity.o -obj-$(CONFIG_DRM_SCHED) += gpu-sched.o +# +# In DKMS mode the module can be renamed by passing SCHED_NAME as a parameter +# to 'make' if required +# +SCHED_NAME = gpu-sched + +$(SCHED_NAME)-y := sched_main.o sched_fence.o sched_entity.o +obj-$(CONFIG_DRM_SCHED) += $(SCHED_NAME).o + +SCHED_FULL_PATH := $(src) + + +ccflags-y := -I$(SCHED_FULL_PATH) + +include $(SCHED_FULL_PATH)/backport/Makefile diff --git a/drivers/gpu/drm/scheduler/backport/Makefile b/drivers/gpu/drm/scheduler/backport/Makefile new file mode 100644 index 0000000000000..01bf391770a05 --- /dev/null +++ b/drivers/gpu/drm/scheduler/backport/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: MIT +ccflags-y += \ + -I$(SCHED_FULL_PATH) \ + -I$(SCHED_FULL_PATH)/../amd/dkms \ + -include config/config.h \ + -include backport/backport.h diff --git a/drivers/gpu/drm/scheduler/backport/backport.h b/drivers/gpu/drm/scheduler/backport/backport.h new file mode 100644 index 0000000000000..8f980b3fc2384 --- /dev/null +++ b/drivers/gpu/drm/scheduler/backport/backport.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef AMDSCHED_BACKPORT_H +#define AMDSCHED_BACKPORT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#endif diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index c75302ca3427c..087b47fb976b1 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -48,7 +48,7 @@ DECLARE_EVENT_CLASS(drm_sched_job, __entry->entity = entity; __entry->id = sched_job->id; __entry->fence = &sched_job->s_fence->finished; - __assign_str(name); + __amdkcl_assign_str(name, sched_job->sched->name); __entry->job_count = spsc_queue_count(&entity->job_queue); __entry->hw_job_count = atomic_read( &sched_job->sched->credit_count); @@ -94,7 +94,7 @@ TRACE_EVENT(drm_sched_job_wait_dep, ), TP_fast_assign( - __assign_str(name); + __amdkcl_assign_str(name, sched_job->sched->name); __entry->id = sched_job->id; __entry->fence = fence; __entry->ctx = fence->context; @@ -110,5 +110,5 @@ TRACE_EVENT(drm_sched_job_wait_dep, /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/scheduler +#define TRACE_INCLUDE_PATH . #include diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 58c8161289fea..6b278e18dabea 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -192,6 +192,7 @@ static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) job->sched->ops->free_job(job); } +#ifdef HAVE_STRUCT_XARRAY /* Signal the scheduler finished fence when the entity in question is killed. */ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, struct dma_fence_cb *cb) @@ -265,7 +266,7 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) } dma_fence_put(prev); } - +#endif /** * drm_sched_entity_flush - Flush a context entity * @@ -305,14 +306,75 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) /* For killed process disable any more IBs enqueue right now */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); +#ifdef HAVE_STRUCT_XARRAY if ((!last_user || last_user == current->group_leader) && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) drm_sched_entity_kill(entity); - +#else + if ((!last_user || last_user == current->group_leader) && + (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) { + spin_lock(&entity->rq_lock); + entity->stopped = true; + drm_sched_rq_remove_entity(entity->rq, entity); + spin_unlock(&entity->rq_lock); + } +#endif return ret; } EXPORT_SYMBOL(drm_sched_entity_flush); +#ifndef HAVE_STRUCT_XARRAY +/* Signal the scheduler finished fence when the entity in question is killed. */ +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct drm_sched_job *job = container_of(cb, struct drm_sched_job, + finish_cb); + + dma_fence_put(f); + INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); + schedule_work(&job->work); +} + +static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) +{ + struct drm_sched_job *job; + struct dma_fence *f; + int r; + + while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) { + struct drm_sched_fence *s_fence = job->s_fence; + + /* Wait for all dependencies to avoid data corruptions */ + while ((f = job->sched->ops->prepare_job(job, entity))) { + dma_fence_wait(f, false); + dma_fence_put(f); + } + + drm_sched_fence_scheduled(s_fence, f); + dma_fence_set_error(&s_fence->finished, -ESRCH); + + /* + * When pipe is hanged by older entity, new entity might + * not even have chance to submit it's first job to HW + * and so entity->last_scheduled will remain NULL + */ + if (!entity->last_scheduled) { + drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + continue; + } + + dma_fence_get(entity->last_scheduled); + r = dma_fence_add_callback(entity->last_scheduled, + &job->finish_cb, + drm_sched_entity_kill_jobs_cb); + if (r == -ENOENT) + drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", r); + } +} +#endif /** * drm_sched_entity_fini - Destroy a context entity * @@ -326,6 +388,36 @@ EXPORT_SYMBOL(drm_sched_entity_flush); */ void drm_sched_entity_fini(struct drm_sched_entity *entity) { +#ifndef HAVE_STRUCT_XARRAY + struct drm_gpu_scheduler *sched = NULL; + + if (entity->rq) { + sched = entity->rq->sched; + drm_sched_rq_remove_entity(entity->rq, entity); + } + + /* Consumption of existing IBs wasn't completed. Forcefully + * remove them here. + */ + if (spsc_queue_count(&entity->job_queue)) { + if (sched) { + /* + * Wait for thread to idle to make sure it isn't processing + * this entity. + */ + wait_for_completion(&entity->entity_idle); + + } + if (entity->dependency) { + dma_fence_remove_callback(entity->dependency, + &entity->cb); + dma_fence_put(entity->dependency); + entity->dependency = NULL; + } + + drm_sched_entity_kill_jobs(entity); + } +#else /* * If consumption of existing IBs wasn't completed. Forcefully remove * them here. Also makes sure that the scheduler won't touch this entity @@ -338,7 +430,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) dma_fence_put(entity->dependency); entity->dependency = NULL; } - +#endif dma_fence_put(rcu_dereference_check(entity->last_scheduled, true)); RCU_INIT_POINTER(entity->last_scheduled, NULL); } @@ -449,6 +541,7 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) return false; } +#ifdef HAVE_STRUCT_XARRAY static struct dma_fence * drm_sched_job_dependency(struct drm_sched_job *job, struct drm_sched_entity *entity) @@ -470,9 +563,13 @@ drm_sched_job_dependency(struct drm_sched_job *job, return NULL; } +#endif struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) { +#ifndef HAVE_STRUCT_XARRAY + struct drm_gpu_scheduler *sched = entity->rq->sched; +#endif struct drm_sched_job *sched_job; sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); @@ -480,7 +577,11 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) return NULL; while ((entity->dependency = +#ifdef HAVE_STRUCT_XARRAY drm_sched_job_dependency(sched_job, entity))) { +#else + sched->ops->prepare_job(sched_job, entity))) { +#endif trace_drm_sched_job_wait_dep(sched_job, entity->dependency); if (drm_sched_entity_add_dependency_cb(entity)) diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 0f35f009b9d37..7893cb8345a57 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -46,6 +46,7 @@ static void __exit drm_sched_fence_slab_fini(void) kmem_cache_destroy(sched_fence_slab); } +#ifdef HAVE_DMA_FENCE_OPS_SET_DEADLINE static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, struct dma_fence *fence) { @@ -59,6 +60,7 @@ static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, &s_fence->finished.flags)) dma_fence_set_deadline(fence, s_fence->deadline); } +#endif void drm_sched_fence_scheduled(struct drm_sched_fence *fence, struct dma_fence *parent) @@ -70,8 +72,11 @@ void drm_sched_fence_scheduled(struct drm_sched_fence *fence, * up. */ if (!IS_ERR_OR_NULL(parent)) +#ifdef HAVE_DMA_FENCE_OPS_SET_DEADLINE drm_sched_fence_set_parent(fence, parent); - +#else + fence->parent = dma_fence_get(parent); +#endif dma_fence_signal(&fence->scheduled); } @@ -147,6 +152,7 @@ static void drm_sched_fence_release_finished(struct dma_fence *f) dma_fence_put(&fence->scheduled); } +#ifdef HAVE_DMA_FENCE_OPS_SET_DEADLINE static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, ktime_t deadline) { @@ -177,18 +183,25 @@ static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, if (parent) dma_fence_set_deadline(parent, deadline); } +#endif static const struct dma_fence_ops drm_sched_fence_ops_scheduled = { .get_driver_name = drm_sched_fence_get_driver_name, .get_timeline_name = drm_sched_fence_get_timeline_name, + AMDKCL_DMA_FENCE_OPS_ENABLE_SIGNALING_OPTIONAL + AMDKCL_DMA_FENCE_OPS_WAIT_OPTIONAL .release = drm_sched_fence_release_scheduled, }; static const struct dma_fence_ops drm_sched_fence_ops_finished = { .get_driver_name = drm_sched_fence_get_driver_name, .get_timeline_name = drm_sched_fence_get_timeline_name, + AMDKCL_DMA_FENCE_OPS_ENABLE_SIGNALING_OPTIONAL + AMDKCL_DMA_FENCE_OPS_WAIT_OPTIONAL .release = drm_sched_fence_release_finished, +#ifdef HAVE_DMA_FENCE_OPS_SET_DEADLINE .set_deadline = drm_sched_fence_set_deadline_finished, +#endif }; struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 7e90c9f95611a..5adab4b3386c1 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -813,8 +813,9 @@ int drm_sched_job_init(struct drm_sched_job *job, return -ENOMEM; INIT_LIST_HEAD(&job->list); - +#ifdef HAVE_STRUCT_XARRAY xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC); +#endif return 0; } @@ -850,6 +851,7 @@ void drm_sched_job_arm(struct drm_sched_job *job) } EXPORT_SYMBOL(drm_sched_job_arm); +#ifdef HAVE_STRUCT_XARRAY /** * drm_sched_job_add_dependency - adds the fence as a job dependency * @job: scheduler job to add the dependencies to @@ -959,6 +961,7 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job, } EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies); +#ifdef HAVE_DRM_GEM_OBJECT_RESV /** * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job * dependencies @@ -982,6 +985,8 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, dma_resv_usage_rw(write)); } EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies); +#endif +#endif /* HAVE_STRUCR_XARRAY */ /** * drm_sched_job_cleanup - clean up scheduler job resources @@ -998,8 +1003,10 @@ EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies); */ void drm_sched_job_cleanup(struct drm_sched_job *job) { +#ifdef HAVE_STRUCT_XARRAY struct dma_fence *fence; unsigned long index; +#endif if (kref_read(&job->s_fence->finished.refcount)) { /* drm_sched_job_arm() has been called */ @@ -1011,11 +1018,12 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) job->s_fence = NULL; +#ifdef HAVE_STRUCT_XARRAY xa_for_each(&job->dependencies, index, fence) { dma_fence_put(fence); } xa_destroy(&job->dependencies); - +#endif } EXPORT_SYMBOL(drm_sched_job_cleanup); diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile index dad298127226c..40e1d3ff14a5b 100644 --- a/drivers/gpu/drm/ttm/Makefile +++ b/drivers/gpu/drm/ttm/Makefile @@ -2,10 +2,21 @@ # # Makefile for the drm device driver. This driver provides support for the -ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \ +# +# In DKMS mode the module can be renamed by passing TTM_NAME as a parameter +# to 'make' if required +# +TTM_NAME = ttm +ccflags-y += \ + -DTTM_NAME="\"$(TTM_NAME)\"" + +$(TTM_NAME)-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \ ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o ttm_pool.o \ ttm_device.o ttm_sys_manager.o -ttm-$(CONFIG_AGP) += ttm_agp_backend.o +$(TTM_NAME)-$(CONFIG_AGP) += ttm_agp_backend.o -obj-$(CONFIG_DRM_TTM) += ttm.o +obj-$(CONFIG_DRM_TTM) += $(TTM_NAME).o obj-$(CONFIG_DRM_TTM_KUNIT_TEST) += tests/ + +TTM_FULL_PATH := $(src) +include $(TTM_FULL_PATH)/backport/Makefile diff --git a/drivers/gpu/drm/ttm/backport/Makefile b/drivers/gpu/drm/ttm/backport/Makefile new file mode 100644 index 0000000000000..0ccfec344b665 --- /dev/null +++ b/drivers/gpu/drm/ttm/backport/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: MIT +ccflags-y += \ + -I$(TTM_FULL_PATH) \ + -I$(TTM_FULL_PATH)/../amd/dkms \ + -include config/config.h \ + -include backport/backport.h + +ccflags-y += -DHAVE_CONFIG_H diff --git a/drivers/gpu/drm/ttm/backport/backport.h b/drivers/gpu/drm/ttm/backport/backport.h new file mode 100644 index 0000000000000..f9e3e75824090 --- /dev/null +++ b/drivers/gpu/drm/ttm/backport/backport.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef AMDTTM_BACKPORT_H +#define AMDTTM_BACKPORT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c index d27691f2e4518..e3121e5d44100 100644 --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c @@ -30,6 +30,9 @@ * Keith Packard. */ +#ifdef pr_fmt +#undef pr_fmt +#endif /* pr_fmt */ #define pr_fmt(fmt) "[TTM] " fmt #include diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6396dece0db15..acb8487ad4457 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -29,6 +29,9 @@ * Authors: Thomas Hellstrom */ +#ifdef pr_fmt +#undef pr_fmt +#endif /* pr_fmt */ #define pr_fmt(fmt) "[TTM] " fmt #include @@ -73,7 +76,7 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, */ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo) { - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); if (bo->resource) ttm_resource_move_to_lru_tail(bo->resource); @@ -97,7 +100,7 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo, struct ttm_lru_bulk_move *bulk) { - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); if (bo->bulk_move == bulk) return; @@ -145,7 +148,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, } } - ret = dma_resv_reserve_fences(bo->base.resv, 1); + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(bo), 1); if (ret) goto out_err; @@ -187,13 +190,13 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) { int r; - if (bo->base.resv == &bo->base._resv) + if (amdkcl_ttm_resvp(bo) == &amdkcl_ttm_resv(bo)) return 0; - BUG_ON(!dma_resv_trylock(&bo->base._resv)); + BUG_ON(!dma_resv_trylock(&amdkcl_ttm_resv(bo))); - r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); - dma_resv_unlock(&bo->base._resv); + r = dma_resv_copy_fences(&amdkcl_ttm_resv(bo), amdkcl_ttm_resvp(bo)); + dma_resv_unlock(&amdkcl_ttm_resv(bo)); if (r) return r; @@ -203,7 +206,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * the resv object while holding the lru_lock. */ spin_lock(&bo->bdev->lru_lock); - bo->base.resv = &bo->base._resv; + amdkcl_ttm_resvp(bo) = &amdkcl_ttm_resv(bo); spin_unlock(&bo->bdev->lru_lock); } @@ -212,7 +215,7 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { - struct dma_resv *resv = &bo->base._resv; + struct dma_resv *resv = &amdkcl_ttm_resv(bo); struct dma_resv_iter cursor; struct dma_fence *fence; @@ -242,7 +245,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool interruptible, bool no_wait_gpu, bool unlock_resv) { - struct dma_resv *resv = &bo->base._resv; + struct dma_resv *resv = &amdkcl_ttm_resv(bo); int ret; if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) @@ -254,7 +257,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, long lret; if (unlock_resv) - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); spin_unlock(&bo->bdev->lru_lock); lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, @@ -267,7 +270,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, return -EBUSY; spin_lock(&bo->bdev->lru_lock); - if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { + if (unlock_resv && !dma_resv_trylock(amdkcl_ttm_resvp(bo))) { /* * We raced, and lost, someone else holds the reservation now, * and is probably busy in ttm_bo_cleanup_memtype_use. @@ -284,7 +287,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret) { if (unlock_resv) - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); spin_unlock(&bo->bdev->lru_lock); return ret; } @@ -293,7 +296,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, ttm_bo_cleanup_memtype_use(bo); if (unlock_resv) - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); return 0; } @@ -308,11 +311,11 @@ static void ttm_bo_delayed_delete(struct work_struct *work) bo = container_of(work, typeof(*bo), delayed_delete); - dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, false, + dma_resv_wait_timeout(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); - dma_resv_lock(bo->base.resv, NULL); + dma_resv_lock(amdkcl_ttm_resvp(bo), NULL); ttm_bo_cleanup_memtype_use(bo); - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); ttm_bo_put(bo); } @@ -332,7 +335,7 @@ static void ttm_bo_release(struct kref *kref) /* Last resort, if we fail to allocate memory for the * fences block for the BO to become idle */ - dma_resv_wait_timeout(bo->base.resv, + dma_resv_wait_timeout(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP, false, 30 * HZ); } @@ -343,10 +346,11 @@ static void ttm_bo_release(struct kref *kref) drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); ttm_mem_io_free(bdev, bo->resource); - if (!dma_resv_test_signaled(bo->base.resv, + if (!dma_resv_test_signaled(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP) || (want_init_on_free() && (bo->ttm != NULL)) || - !dma_resv_trylock(bo->base.resv)) { + bo->type == ttm_bo_type_sg || + !dma_resv_trylock(amdkcl_ttm_resvp(bo))) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); bo->deleted = true; @@ -381,7 +385,7 @@ static void ttm_bo_release(struct kref *kref) } ttm_bo_cleanup_memtype_use(bo); - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); } atomic_dec(&ttm_glob.bo_count); @@ -436,7 +440,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, memset(&hop, 0, sizeof(hop)); - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); placement.num_placement = 0; bdev->funcs->evict_flags(bo, &placement); @@ -494,7 +498,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, struct ttm_resource *res = bo->resource; struct ttm_device *bdev = bo->bdev; - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); if (bo->resource->mem_type == TTM_PL_SYSTEM) return true; @@ -529,15 +533,15 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, return false; } - if (bo->base.resv == ctx->resv) { - dma_resv_assert_held(bo->base.resv); + if (amdkcl_ttm_resvp(bo) == ctx->resv) { + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); if (ctx->allow_res_evict) ret = true; *locked = false; if (busy) *busy = false; } else { - ret = dma_resv_trylock(bo->base.resv); + ret = dma_resv_trylock(amdkcl_ttm_resvp(bo)); *locked = ret; if (busy) *busy = !ret; @@ -547,7 +551,7 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, !bo->bdev->funcs->eviction_valuable(bo, place))) { ret = false; if (*locked) { - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); *locked = false; } } @@ -574,10 +578,10 @@ static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo, return -EBUSY; if (ctx->interruptible) - r = dma_resv_lock_interruptible(busy_bo->base.resv, + r = dma_resv_lock_interruptible(amdkcl_ttm_resvp(busy_bo), ticket); else - r = dma_resv_lock(busy_bo->base.resv, ticket); + r = dma_resv_lock(amdkcl_ttm_resvp(busy_bo), ticket); /* * TODO: It would be better to keep the BO locked until allocation is at @@ -585,7 +589,7 @@ static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo, * of TTM. */ if (!r) - dma_resv_unlock(busy_bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(busy_bo)); return r == -EDEADLK ? -EBUSY : r; } @@ -609,7 +613,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place, &locked, &busy)) { if (busy && !busy_bo && ticket != - dma_resv_locking_ctx(res->bo->base.resv)) + dma_resv_locking_ctx(amdkcl_ttm_resvp(res->bo))) busy_bo = res->bo; continue; } @@ -619,7 +623,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, break; } if (locked) - dma_resv_unlock(res->bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(res->bo)); } if (!bo) { @@ -660,7 +664,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, */ void ttm_bo_pin(struct ttm_buffer_object *bo) { - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); WARN_ON_ONCE(!kref_read(&bo->kref)); spin_lock(&bo->bdev->lru_lock); if (bo->resource) @@ -678,7 +682,7 @@ EXPORT_SYMBOL(ttm_bo_pin); */ void ttm_bo_unpin(struct ttm_buffer_object *bo) { - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); WARN_ON_ONCE(!kref_read(&bo->kref)); if (WARN_ON_ONCE(!bo->pin_count)) return; @@ -715,9 +719,9 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, return ret; } - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(amdkcl_ttm_resvp(bo), fence, DMA_RESV_USAGE_KERNEL); - ret = dma_resv_reserve_fences(bo->base.resv, 1); + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(bo), 1); dma_fence_put(fence); return ret; } @@ -751,8 +755,8 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, struct ww_acquire_ctx *ticket; int i, ret; - ticket = dma_resv_locking_ctx(bo->base.resv); - ret = dma_resv_reserve_fences(bo->base.resv, 1); + ticket = dma_resv_locking_ctx(amdkcl_ttm_resvp(bo)); + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(bo), 1); if (unlikely(ret)) return ret; @@ -852,7 +856,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, bool force_space; int ret; - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); /* * Remove the backing store if no placement is given. @@ -970,9 +974,14 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo, bo->sg = sg; bo->bulk_move = NULL; if (resv) - bo->base.resv = resv; + amdkcl_ttm_resvp(bo) = resv; else - bo->base.resv = &bo->base._resv; + amdkcl_ttm_resvp(bo) = &amdkcl_ttm_resv(bo); + +#ifndef HAVE_DRM_GEM_OBJECT_RESV + dma_resv_init(&amdkcl_ttm_resv(bo)); +#endif + atomic_inc(&ttm_glob.bo_count); /* @@ -990,7 +999,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo, * since otherwise lockdep will be angered in radeon. */ if (!resv) - WARN_ON(!dma_resv_trylock(bo->base.resv)); + WARN_ON(!dma_resv_trylock(amdkcl_ttm_resvp(bo))); else dma_resv_assert_held(resv); @@ -1002,7 +1011,7 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo, err_unlock: if (!resv) - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); err_put: ttm_bo_put(bo); @@ -1100,14 +1109,14 @@ int ttm_bo_wait_ctx(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) long ret; if (ctx->no_wait_gpu) { - if (dma_resv_test_signaled(bo->base.resv, + if (dma_resv_test_signaled(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP)) return 0; else return -EBUSY; } - ret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, + ret = dma_resv_wait_timeout(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP, ctx->interruptible, 15 * HZ); if (unlikely(ret < 0)) return ret; @@ -1140,7 +1149,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED || !ttm_bo_get_unless_zero(bo)) { if (locked) - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); return -EBUSY; } @@ -1199,7 +1208,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, * already swapped buffer. */ if (locked) - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); ttm_bo_put(bo); return ret == -EBUSY ? -ENOSPC : ret; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 0b3f4267130c4..c6524f4574cf2 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -113,7 +113,26 @@ void ttm_move_memcpy(bool clear, dst_ops->map_local(dst_iter, &dst_map, i); src_ops->map_local(src_iter, &src_map, i); +#ifdef HAVE_DRM_MEMCPY_FROM_WC_IOSYS_MAP_ARG drm_memcpy_from_wc(&dst_map, &src_map, PAGE_SIZE); +#else + if (!src_map.is_iomem && !dst_map.is_iomem) { + memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE); + } else if (!src_map.is_iomem) { + iosys_map_memcpy_to(&dst_map, 0, src_map.vaddr, + PAGE_SIZE); + } else if (!dst_map.is_iomem) { + memcpy_fromio(dst_map.vaddr, src_map.vaddr_iomem, + PAGE_SIZE); + } else { + int j; + u32 __iomem *src = src_map.vaddr_iomem; + u32 __iomem *dst = dst_map.vaddr_iomem; + + for (j = 0; j < (PAGE_SIZE / sizeof(u32)); ++j) + iowrite32(ioread32(src++), dst++); + } +#endif if (src_ops->unmap_local) src_ops->unmap_local(src_iter, &src_map); @@ -203,7 +222,7 @@ static void ttm_transfered_destroy(struct ttm_buffer_object *bo) struct ttm_transfer_obj *fbo; fbo = container_of(bo, struct ttm_transfer_obj, base); - dma_resv_fini(&fbo->base.base._resv); + dma_resv_fini(&amdkcl_ttm_resv(&fbo->base)); ttm_bo_put(fbo->bo); kfree(fbo); } @@ -247,11 +266,11 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.destroy = &ttm_transfered_destroy; fbo->base.pin_count = 0; if (bo->type != ttm_bo_type_sg) - fbo->base.base.resv = &fbo->base.base._resv; + amdkcl_ttm_resvp(&fbo->base) = &amdkcl_ttm_resv(&fbo->base); - dma_resv_init(&fbo->base.base._resv); + dma_resv_init(&amdkcl_ttm_resv(&fbo->base)); fbo->base.base.dev = NULL; - ret = dma_resv_trylock(&fbo->base.base._resv); + ret = dma_resv_trylock(&amdkcl_ttm_resv(&fbo->base)); WARN_ON(!ret); if (fbo->base.resource) { @@ -262,7 +281,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.bulk_move = NULL; } - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + ret = dma_resv_reserve_fences(&amdkcl_ttm_resv(&fbo->base), 1); if (ret) { kfree(fbo); return ret; @@ -471,7 +490,7 @@ int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) struct ttm_resource *mem = bo->resource; int ret; - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); ret = ttm_mem_io_reserve(bo->bdev, mem); if (ret) @@ -539,7 +558,7 @@ void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map) { struct ttm_resource *mem = bo->resource; - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); if (iosys_map_is_null(map)) return; @@ -559,7 +578,7 @@ static int ttm_bo_wait_free_node(struct ttm_buffer_object *bo, { long ret; - ret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, + ret = dma_resv_wait_timeout(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP, false, 15 * HZ); if (ret == 0) return -EBUSY; @@ -591,7 +610,7 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, if (ret) return ret; - dma_resv_add_fence(&ghost_obj->base._resv, fence, + dma_resv_add_fence(&amdkcl_ttm_resv(ghost_obj), fence, DMA_RESV_USAGE_KERNEL); /** @@ -605,7 +624,7 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, else bo->ttm = NULL; - dma_resv_unlock(&ghost_obj->base._resv); + dma_resv_unlock(&amdkcl_ttm_resv(ghost_obj)); ttm_bo_put(ghost_obj); return 0; } @@ -659,7 +678,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); int ret = 0; - dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(amdkcl_ttm_resvp(bo), fence, DMA_RESV_USAGE_KERNEL); if (!evict) ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt); else if (!from->use_tt && pipeline) @@ -718,7 +737,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo) int ret; /* If already idle, no need for ghost object dance. */ - if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP)) { + if (dma_resv_test_signaled(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP)) { if (!bo->ttm) { /* See comment below about clearing. */ ret = ttm_tt_create(bo, true); @@ -752,14 +771,14 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo) if (ret) goto error_destroy_tt; - ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv); + ret = dma_resv_copy_fences(&amdkcl_ttm_resv(ghost), amdkcl_ttm_resvp(bo)); /* Last resort, wait for the BO to be idle when we are OOM */ if (ret) { - dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, + dma_resv_wait_timeout(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); } - dma_resv_unlock(&ghost->base._resv); + dma_resv_unlock(&amdkcl_ttm_resv(ghost)); ttm_bo_put(ghost); bo->ttm = ttm; return 0; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 4212b8c91dd42..edfbf49e55107 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -29,6 +29,9 @@ * Authors: Thomas Hellstrom */ +#ifdef pr_fmt +#undef pr_fmt +#endif /* pr_fmt */ #define pr_fmt(fmt) "[TTM] " fmt #include @@ -39,14 +42,15 @@ #include static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, - struct vm_fault *vmf) + struct vm_fault *vmf, + struct vm_area_struct *vma) { long err = 0; /* * Quick non-stalling check for idle. */ - if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_KERNEL)) + if (dma_resv_test_signaled(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_KERNEL)) return 0; /* @@ -59,11 +63,11 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, return VM_FAULT_RETRY; ttm_bo_get(bo); - mmap_read_unlock(vmf->vma->vm_mm); - (void)dma_resv_wait_timeout(bo->base.resv, + mmap_read_unlock(vma->vm_mm); + (void)dma_resv_wait_timeout(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_KERNEL, true, MAX_SCHEDULE_TIMEOUT); - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); ttm_bo_put(bo); return VM_FAULT_RETRY; } @@ -71,7 +75,7 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, /* * Ordinary wait. */ - err = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, true, + err = dma_resv_wait_timeout(amdkcl_ttm_resvp(bo), DMA_RESV_USAGE_KERNEL, true, MAX_SCHEDULE_TIMEOUT); if (unlikely(err < 0)) { return (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : @@ -114,15 +118,16 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. */ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, - struct vm_fault *vmf) + struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; /* * Work around locking order reversal in fault / nopfn * between mmap_lock and bo_reserve: Perform a trylock operation * for reserve, and if it fails, retry the fault after waiting * for the buffer to become unreserved. */ - if (unlikely(!dma_resv_trylock(bo->base.resv))) { + if (unlikely(!dma_resv_trylock(amdkcl_ttm_resvp(bo)))) { /* * If the fault allows retry and this is the first * fault attempt, we try to release the mmap_lock @@ -131,17 +136,17 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, if (fault_flag_allow_retry_first(vmf->flags)) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { ttm_bo_get(bo); - mmap_read_unlock(vmf->vma->vm_mm); - if (!dma_resv_lock_interruptible(bo->base.resv, + mmap_read_unlock(vma->vm_mm); + if (!dma_resv_lock_interruptible(amdkcl_ttm_resvp(bo), NULL)) - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); ttm_bo_put(bo); } return VM_FAULT_RETRY; } - if (dma_resv_lock_interruptible(bo->base.resv, NULL)) + if (dma_resv_lock_interruptible(amdkcl_ttm_resvp(bo), NULL)) return VM_FAULT_NOPAGE; } @@ -151,7 +156,7 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, */ if (bo->ttm && (bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { if (!(bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE)) { - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); return VM_FAULT_SIGBUS; } } @@ -193,13 +198,17 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, int err; pgoff_t i; vm_fault_t ret = VM_FAULT_NOPAGE; +#ifndef HAVE_VM_FAULT_ADDRESS_VMA + unsigned long address = (unsigned long)vmf->virtual_address; +#else unsigned long address = vmf->address; +#endif /* * Wait for buffer data in transit, due to a pipelined * move. */ - ret = ttm_bo_vm_fault_idle(bo, vmf); + ret = ttm_bo_vm_fault_idle(bo, vmf, vma); if (unlikely(ret != 0)) return ret; @@ -234,8 +243,10 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, return VM_FAULT_SIGBUS; } } else { +#ifdef pgprot_decrypted /* Iomem should not be marked encrypted */ prot = pgprot_decrypted(prot); +#endif } /* @@ -304,8 +315,10 @@ vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) return VM_FAULT_OOM; /* Set the page to be freed using drmm release action */ +#ifdef drmm_add_action_or_reset if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) return VM_FAULT_OOM; +#endif pfn = page_to_pfn(page); @@ -318,9 +331,14 @@ vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) } EXPORT_SYMBOL(ttm_bo_vm_dummy_page); +#ifndef HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG +vm_fault_t ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#else vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; +#endif pgprot_t prot; struct ttm_buffer_object *bo = vma->vm_private_data; struct drm_device *ddev = bo->base.dev; @@ -332,16 +350,18 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return ret; prot = vma->vm_page_prot; + if (drm_dev_enter(ddev, &idx)) { ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); drm_dev_exit(idx); } else { ret = ttm_bo_vm_dummy_page(vmf, prot); } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) return ret; - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); return ret; } @@ -448,6 +468,83 @@ static const struct vm_operations_struct ttm_bo_vm_ops = { .access = ttm_bo_vm_access, }; +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK +static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_device *bdev, + unsigned long offset, + unsigned long pages) +{ + struct drm_vma_offset_node *node; + struct ttm_buffer_object *bo = NULL; + + drm_vma_offset_lock_lookup(bdev->vma_manager); + + node = drm_vma_offset_lookup_locked(bdev->vma_manager, offset, pages); + if (likely(node)) { + bo = container_of(node, struct ttm_buffer_object, + base.vma_node); + bo = ttm_bo_get_unless_zero(bo); + } + + drm_vma_offset_unlock_lookup(bdev->vma_manager); + + if (!bo) + pr_err("Could not find buffer object to map\n"); + + return bo; +} + +static void ttm_bo_mmap_vma_setup(struct ttm_buffer_object *bo, struct vm_area_struct *vma) +{ + vma->vm_ops = &ttm_bo_vm_ops; + + /* + * Note: We're transferring the bo reference to + * vma->vm_private_data here. + */ + + vma->vm_private_data = bo; + + /* + * We'd like to use VM_PFNMAP on shared mappings, where + * (vma->vm_flags & VM_SHARED) != 0, for performance reasons, + * but for some reason VM_PFNMAP + x86 PAT + write-combine is very + * bad for performance. Until that has been sorted out, use + * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719 + */ + vma->vm_flags |= VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; +} + +int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, + struct ttm_device *bdev) +{ + struct ttm_buffer_object *bo; + int ret; + + if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START)) + return -EINVAL; + + bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma)); + if (unlikely(!bo)) + return -EINVAL; + + if (unlikely(!bo->bdev->funcs->verify_access)) { + ret = -EPERM; + goto out_unref; + } + ret = bo->bdev->funcs->verify_access(bo, filp); + if (unlikely(ret != 0)) + goto out_unref; + + ttm_bo_mmap_vma_setup(bo, vma); + return 0; +out_unref: + ttm_bo_put(bo); + return ret; +} +EXPORT_SYMBOL(ttm_bo_mmap); +#endif /* HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK */ + /** * ttm_bo_mmap_obj - mmap memory backed by a ttm buffer object. * diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 434cf0258000e..c23005e14555d 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -25,8 +25,6 @@ * Authors: Christian König */ -#define pr_fmt(fmt) "[TTM DEVICE] " fmt - #include #include @@ -37,6 +35,11 @@ #include "ttm_module.h" +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "[TTM DEVICE] " fmt + /* * ttm_global_mutex - protecting the global state */ @@ -56,7 +59,13 @@ static void ttm_global_release(void) goto out; ttm_pool_mgr_fini(); - debugfs_remove(ttm_debugfs_root); + + /* + * Replace the debugfs_remove() with debugfs_remove_recursive() for dkms code. + * debugfs_remove() can't remove the ttm/ directory in legacy kernel. + * So use the debugfs_remove_recursive() here. + */ + debugfs_remove_recursive(ttm_debugfs_root); __free_page(glob->dummy_read_page); memset(glob, 0, sizeof(*glob)); @@ -68,6 +77,9 @@ static int ttm_global_init(void) { struct ttm_global *glob = &ttm_glob; unsigned long num_pages, num_dma32; +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); +#endif struct sysinfo si; int ret = 0; @@ -77,7 +89,7 @@ static int ttm_global_init(void) si_meminfo(&si); - ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + ttm_debugfs_root = debugfs_create_dir(TTM_NAME, NULL); if (IS_ERR(ttm_debugfs_root)) { ttm_debugfs_root = NULL; } @@ -86,7 +98,17 @@ static int ttm_global_init(void) * system memory. */ num_pages = ((u64)si.totalram * si.mem_unit) >> PAGE_SHIFT; +#if IS_ENABLED(CONFIG_X86) + /* For GFX 9.4.3 APU, set mem limit to be 3/4th of + * system memory. + */ + if (c->x86 == 0x19 && c->x86_model == 0x90) + num_pages = (num_pages * 3) / 4; + else + num_pages /= 2; +#else num_pages /= 2; +#endif /* But for DMA32 we limit ourself to only use 2GiB maximum. */ num_dma32 = (u64)(si.totalram - si.totalhigh) * si.mem_unit diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index f1c60fa80c2d1..b7b085f3dd360 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -35,7 +35,7 @@ static void ttm_eu_backoff_reservation_reverse(struct list_head *list, list_for_each_entry_continue_reverse(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); } } @@ -51,7 +51,7 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, struct ttm_buffer_object *bo = entry->bo; ttm_bo_move_to_lru_tail_unlocked(bo); - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); } if (ticket) @@ -99,7 +99,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, num_fences = max(entry->num_shared, 1u); if (!ret) { - ret = dma_resv_reserve_fences(bo->base.resv, + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(bo), num_fences); if (!ret) continue; @@ -116,7 +116,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, } if (!ret) - ret = dma_resv_reserve_fences(bo->base.resv, + ret = dma_resv_reserve_fences(amdkcl_ttm_resvp(bo), num_fences); if (unlikely(ret != 0)) { @@ -150,10 +150,10 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ? + dma_resv_add_fence(amdkcl_ttm_resvp(bo), fence, entry->num_shared ? DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE); ttm_bo_move_to_lru_tail_unlocked(bo); - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); } if (ticket) ww_acquire_fini(ticket); diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index b3fffe7b5062a..7e8366ece9d6d 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -31,7 +31,9 @@ */ #include #include +#ifdef HAVE_LINUX_PGTABLE_H #include +#endif #include #include #include diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 6e1fd6985ffcb..7af5ad8e1b7af 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -73,7 +73,12 @@ static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; -static struct shrinker *mm_shrinker; +static struct shrinker +#ifdef HAVE_SHRINKER_REGISTER +*mm_shrinker; +#else +mm_shrinker; +#endif static DECLARE_RWSEM(pool_shrink_rwsem); /* Allocate pages of size 1 << order with the given gfp_flags */ @@ -91,7 +96,7 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, */ if (order) gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | - __GFP_KSWAPD_RECLAIM; + __GFP_THISNODE; if (!pool->use_dma_alloc) { p = alloc_pages_node(pool->nid, gfp_flags, order); @@ -107,7 +112,7 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, if (order) attr |= DMA_ATTR_NO_WARN; - vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE, + vaddr = kcl_dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE, &dma->addr, gfp_flags, attr); if (!vaddr) goto error_free; @@ -155,7 +160,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, dma = (void *)p->private; vaddr = (void *)(dma->vaddr & PAGE_MASK); - dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr, + kcl_dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr, attr); kfree(dma); } @@ -767,8 +772,20 @@ static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data) struct shrink_control sc = { .gfp_mask = GFP_NOFS }; fs_reclaim_acquire(GFP_KERNEL); - seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(mm_shrinker, &sc), - ttm_pool_shrinker_scan(mm_shrinker, &sc)); + seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count( +#ifdef HAVE_SHRINKER_REGISTER + mm_shrinker, +#else + &mm_shrinker, +#endif + &sc), + ttm_pool_shrinker_scan( +#ifdef HAVE_SHRINKER_REGISTER + mm_shrinker +#else + &mm_shrinker +#endif + , &sc)); fs_reclaim_release(GFP_KERNEL); return 0; @@ -812,6 +829,7 @@ int ttm_pool_mgr_init(unsigned long num_pages) &ttm_pool_debugfs_shrink_fops); #endif +#ifdef HAVE_SHRINKER_REGISTER mm_shrinker = shrinker_alloc(0, "drm-ttm_pool"); if (!mm_shrinker) return -ENOMEM; @@ -821,8 +839,14 @@ int ttm_pool_mgr_init(unsigned long num_pages) mm_shrinker->seeks = 1; shrinker_register(mm_shrinker); - return 0; +#else + mm_shrinker.count_objects = ttm_pool_shrinker_count; + mm_shrinker.scan_objects = ttm_pool_shrinker_scan; + mm_shrinker.seeks = 1; + + return kcl_register_shrinker(&mm_shrinker, "drm-ttm_pool"); +#endif } /** @@ -842,6 +866,10 @@ void ttm_pool_mgr_fini(void) ttm_pool_type_fini(&global_dma32_uncached[i]); } +#ifdef HAVE_SHRINKER_REGISTER shrinker_free(mm_shrinker); +#else + unregister_shrinker(&mm_shrinker); +#endif WARN_ON(!list_empty(&shrinker_list)); } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 4a66b851b67da..996324bbd5b12 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -66,8 +67,8 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk) continue; lockdep_assert_held(&pos->first->bo->bdev->lru_lock); - dma_resv_assert_held(pos->first->bo->base.resv); - dma_resv_assert_held(pos->last->bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(pos->first->bo)); + dma_resv_assert_held(amdkcl_ttm_resvp(pos->last->bo)); man = ttm_manager_type(pos->first->bo->bdev, i); list_bulk_move_tail(&man->lru[j], &pos->first->lru, @@ -106,7 +107,7 @@ static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk, pos->first = res; pos->last = res; } else { - WARN_ON(pos->first->bo->base.resv != res->bo->base.resv); + WARN_ON(amdkcl_ttm_resvp(pos->first->bo) != amdkcl_ttm_resvp(res->bo)); ttm_lru_bulk_move_pos_tail(pos, res); } } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 4b51b90231267..107b7b546c347 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -29,6 +29,9 @@ * Authors: Thomas Hellstrom */ +#ifdef pr_fmt +#undef pr_fmt +#endif /* pr_fmt */ #define pr_fmt(fmt) "[TTM] " fmt #include @@ -36,6 +39,8 @@ #include #include #include +#include +#include #include #include #include @@ -67,7 +72,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) struct drm_device *ddev = bo->base.dev; uint32_t page_flags = 0; - dma_resv_assert_held(bo->base.resv); + dma_resv_assert_held(amdkcl_ttm_resvp(bo)); if (bo->ttm) return 0; @@ -202,6 +207,7 @@ int ttm_sg_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo, ret = ttm_sg_tt_alloc_page_directory(ttm); else ret = ttm_dma_tt_alloc_page_directory(ttm); + if (ret) { pr_err("Failed allocating page table\n"); return -ENOMEM; diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index 9388823bb0bb9..44710267d6699 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -135,7 +135,7 @@ config I2C_SLAVE_EEPROM Documentation/i2c/slave-eeprom-backend.rst for further details. config I2C_SLAVE_TESTUNIT - tristate "I2C eeprom testunit driver" + tristate "I2C testunit driver" help This backend can be used to trigger test cases for I2C bus masters which require a remote device with certain capabilities, e.g. diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index a12525b3186bc..f448505d54682 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -32,7 +31,6 @@ struct i2c_pnx_mif { int ret; /* Return value */ int mode; /* Interface mode */ struct completion complete; /* I/O completion */ - struct timer_list timer; /* Timeout */ u8 * buf; /* Data buffer */ int len; /* Length of data buffer */ int order; /* RX Bytes to order via TX */ @@ -117,24 +115,6 @@ static inline int wait_reset(struct i2c_pnx_algo_data *data) return (timeout <= 0); } -static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data) -{ - struct timer_list *timer = &alg_data->mif.timer; - unsigned long expires = msecs_to_jiffies(alg_data->timeout); - - if (expires <= 1) - expires = 2; - - del_timer_sync(timer); - - dev_dbg(&alg_data->adapter.dev, "Timer armed at %lu plus %lu jiffies.\n", - jiffies, expires); - - timer->expires = jiffies + expires; - - add_timer(timer); -} - /** * i2c_pnx_start - start a device * @slave_addr: slave address @@ -259,8 +239,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data) ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), I2C_REG_CTL(alg_data)); - del_timer_sync(&alg_data->mif.timer); - dev_dbg(&alg_data->adapter.dev, "%s(): Waking up xfer routine.\n", __func__); @@ -276,8 +254,6 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data) ~(mcntrl_afie | mcntrl_naie | mcntrl_drmie), I2C_REG_CTL(alg_data)); - /* Stop timer. */ - del_timer_sync(&alg_data->mif.timer); dev_dbg(&alg_data->adapter.dev, "%s(): Waking up xfer routine after zero-xfer.\n", __func__); @@ -364,8 +340,6 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data) mcntrl_drmie | mcntrl_daie); iowrite32(ctl, I2C_REG_CTL(alg_data)); - /* Kill timer. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } } @@ -400,8 +374,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) mcntrl_drmie); iowrite32(ctl, I2C_REG_CTL(alg_data)); - /* Stop timer, to prevent timeout. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } else if (stat & mstatus_nai) { /* Slave did not acknowledge, generate a STOP */ @@ -419,8 +391,6 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) /* Our return value. */ alg_data->mif.ret = -EIO; - /* Stop timer, to prevent timeout. */ - del_timer_sync(&alg_data->mif.timer); complete(&alg_data->mif.complete); } else { /* @@ -453,9 +423,8 @@ static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void i2c_pnx_timeout(struct timer_list *t) +static void i2c_pnx_timeout(struct i2c_pnx_algo_data *alg_data) { - struct i2c_pnx_algo_data *alg_data = from_timer(alg_data, t, mif.timer); u32 ctl; dev_err(&alg_data->adapter.dev, @@ -472,7 +441,6 @@ static void i2c_pnx_timeout(struct timer_list *t) iowrite32(ctl, I2C_REG_CTL(alg_data)); wait_reset(alg_data); alg_data->mif.ret = -EIO; - complete(&alg_data->mif.complete); } static inline void bus_reset_if_active(struct i2c_pnx_algo_data *alg_data) @@ -514,6 +482,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) struct i2c_msg *pmsg; int rc = 0, completed = 0, i; struct i2c_pnx_algo_data *alg_data = adap->algo_data; + unsigned long time_left; u32 stat; dev_dbg(&alg_data->adapter.dev, @@ -548,7 +517,6 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) dev_dbg(&alg_data->adapter.dev, "%s(): mode %d, %d bytes\n", __func__, alg_data->mif.mode, alg_data->mif.len); - i2c_pnx_arm_timer(alg_data); /* initialize the completion var */ init_completion(&alg_data->mif.complete); @@ -564,7 +532,10 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) break; /* Wait for completion */ - wait_for_completion(&alg_data->mif.complete); + time_left = wait_for_completion_timeout(&alg_data->mif.complete, + alg_data->timeout); + if (time_left == 0) + i2c_pnx_timeout(alg_data); if (!(rc = alg_data->mif.ret)) completed++; @@ -653,7 +624,10 @@ static int i2c_pnx_probe(struct platform_device *pdev) alg_data->adapter.algo_data = alg_data; alg_data->adapter.nr = pdev->id; - alg_data->timeout = I2C_PNX_TIMEOUT_DEFAULT; + alg_data->timeout = msecs_to_jiffies(I2C_PNX_TIMEOUT_DEFAULT); + if (alg_data->timeout <= 1) + alg_data->timeout = 2; + #ifdef CONFIG_OF alg_data->adapter.dev.of_node = of_node_get(pdev->dev.of_node); if (pdev->dev.of_node) { @@ -673,8 +647,6 @@ static int i2c_pnx_probe(struct platform_device *pdev) if (IS_ERR(alg_data->clk)) return PTR_ERR(alg_data->clk); - timer_setup(&alg_data->mif.timer, i2c_pnx_timeout, 0); - snprintf(alg_data->adapter.name, sizeof(alg_data->adapter.name), "%s", pdev->name); diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 828aa2ea0fe4c..185a5d60f1019 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -257,6 +257,14 @@ static void rcar_i2c_init(struct rcar_i2c_priv *priv) } } +static void rcar_i2c_reset_slave(struct rcar_i2c_priv *priv) +{ + rcar_i2c_write(priv, ICSIER, 0); + rcar_i2c_write(priv, ICSSR, 0); + rcar_i2c_write(priv, ICSCR, SDBS); + rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ +} + static int rcar_i2c_bus_barrier(struct rcar_i2c_priv *priv) { int ret; @@ -875,6 +883,10 @@ static int rcar_i2c_do_reset(struct rcar_i2c_priv *priv) { int ret; + /* Don't reset if a slave instance is currently running */ + if (priv->slave) + return -EISCONN; + ret = reset_control_reset(priv->rstc); if (ret) return ret; @@ -903,10 +915,10 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, /* Gen3+ needs a reset. That also allows RXDMA once */ if (priv->devtype >= I2C_RCAR_GEN3) { - priv->flags &= ~ID_P_NO_RXDMA; ret = rcar_i2c_do_reset(priv); if (ret) goto out; + priv->flags &= ~ID_P_NO_RXDMA; } rcar_i2c_init(priv); @@ -1033,11 +1045,8 @@ static int rcar_unreg_slave(struct i2c_client *slave) /* ensure no irq is running before clearing ptr */ disable_irq(priv->irq); - rcar_i2c_write(priv, ICSIER, 0); - rcar_i2c_write(priv, ICSSR, 0); + rcar_i2c_reset_slave(priv); enable_irq(priv->irq); - rcar_i2c_write(priv, ICSCR, SDBS); - rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ priv->slave = NULL; @@ -1152,7 +1161,9 @@ static int rcar_i2c_probe(struct platform_device *pdev) goto out_pm_disable; } - rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ + /* Bring hardware to known state */ + rcar_i2c_init(priv); + rcar_i2c_reset_slave(priv); if (priv->devtype < I2C_RCAR_GEN3) { irqflags |= IRQF_NO_THREAD; @@ -1168,6 +1179,7 @@ static int rcar_i2c_probe(struct platform_device *pdev) if (of_property_read_bool(dev->of_node, "smbus")) priv->flags |= ID_P_HOST_NOTIFY; + /* R-Car Gen3+ needs a reset before every transfer */ if (priv->devtype >= I2C_RCAR_GEN3) { priv->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(priv->rstc)) { @@ -1178,6 +1190,9 @@ static int rcar_i2c_probe(struct platform_device *pdev) ret = reset_control_status(priv->rstc); if (ret < 0) goto out_pm_put; + + /* hard reset disturbs HostNotify local target, so disable it */ + priv->flags &= ~ID_P_HOST_NOTIFY; } ret = platform_get_irq(pdev, 0); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index db0d1ac82910e..7e7b15440832b 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1067,6 +1067,7 @@ EXPORT_SYMBOL(i2c_find_device_by_fwnode); static const struct i2c_device_id dummy_id[] = { { "dummy", 0 }, + { "smbus_host_notify", 0 }, { }, }; diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index ca43e98cae1b2..23a11e4e92567 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -118,6 +118,13 @@ static int i2c_slave_testunit_slave_cb(struct i2c_client *client, queue_delayed_work(system_long_wq, &tu->worker, msecs_to_jiffies(10 * tu->regs[TU_REG_DELAY])); } + + /* + * Reset reg_idx to avoid that work gets queued again in case of + * STOP after a following read message. But do not clear TU regs + * here because we still need them in the workqueue! + */ + tu->reg_idx = 0; break; case I2C_SLAVE_WRITE_REQUESTED: diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index 16de57846bd9c..2e84776f4fbd4 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -315,7 +315,7 @@ int iio_trigger_attach_poll_func(struct iio_trigger *trig, * this is the case if the IIO device and the trigger device share the * same parent device. */ - if (iio_validate_own_trigger(pf->indio_dev, trig)) + if (!iio_validate_own_trigger(pf->indio_dev, trig)) trig->attached_own_device = true; return ret; diff --git a/drivers/iio/light/apds9306.c b/drivers/iio/light/apds9306.c index d6627b3e6000e..66a063ea3db44 100644 --- a/drivers/iio/light/apds9306.c +++ b/drivers/iio/light/apds9306.c @@ -583,8 +583,8 @@ static int apds9306_intg_time_set(struct apds9306_data *data, int val2) return ret; intg_old = iio_gts_find_int_time_by_sel(&data->gts, intg_time_idx); - if (ret < 0) - return ret; + if (intg_old < 0) + return intg_old; if (intg_old == val2) return 0; diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c index b423bec6458bb..9d51f72a9d66b 100644 --- a/drivers/md/dm-vdo/dm-vdo-target.c +++ b/drivers/md/dm-vdo/dm-vdo-target.c @@ -945,7 +945,7 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits) * The value is used by dm-thin to determine whether to pass down discards. The block layer * splits large discards on this boundary when this is set. */ - limits->max_discard_sectors = + limits->max_hw_discard_sectors = (vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK); /* diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c index c8a33e1e910c9..06090cc0a4760 100644 --- a/drivers/media/pci/intel/ipu6/ipu6-isys-video.c +++ b/drivers/media/pci/intel/ipu6/ipu6-isys-video.c @@ -943,7 +943,7 @@ ipu6_isys_query_stream_by_source(struct ipu6_isys *isys, int source, u8 vc) return NULL; if (source < 0) { - dev_err(&stream->isys->adev->auxdev.dev, + dev_err(&isys->adev->auxdev.dev, "query stream with invalid port number\n"); return NULL; } diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys.c b/drivers/media/pci/intel/ipu6/ipu6-isys.c index 8b9b77719bb16..c4aff2e2009ba 100644 --- a/drivers/media/pci/intel/ipu6/ipu6-isys.c +++ b/drivers/media/pci/intel/ipu6/ipu6-isys.c @@ -799,7 +799,7 @@ static int isys_register_devices(struct ipu6_isys *isys) isys->v4l2_dev.mdev = &isys->media_dev; isys->v4l2_dev.ctrl_handler = NULL; - ret = v4l2_device_register(&pdev->dev, &isys->v4l2_dev); + ret = v4l2_device_register(dev, &isys->v4l2_dev); if (ret < 0) goto out_media_device_unregister; diff --git a/drivers/media/pci/intel/ivsc/Kconfig b/drivers/media/pci/intel/ivsc/Kconfig index 407a800c81bc0..a7d9607ecdc60 100644 --- a/drivers/media/pci/intel/ivsc/Kconfig +++ b/drivers/media/pci/intel/ivsc/Kconfig @@ -4,6 +4,7 @@ config INTEL_VSC tristate "Intel Visual Sensing Controller" depends on INTEL_MEI && ACPI && VIDEO_DEV + depends on IPU_BRIDGE || !IPU_BRIDGE select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API select V4L2_FWNODE diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 4c67e2c5a82e1..a7a2bcedb37e4 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1238,6 +1238,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, struct fastrpc_phy_page pages[1]; char *name; int err; + bool scm_done = false; struct { int pgid; u32 namelen; @@ -1289,6 +1290,7 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err); goto err_map; } + scm_done = true; } } @@ -1320,10 +1322,11 @@ static int fastrpc_init_create_static_process(struct fastrpc_user *fl, goto err_invoke; kfree(args); + kfree(name); return 0; err_invoke: - if (fl->cctx->vmcount) { + if (fl->cctx->vmcount && scm_done) { u64 src_perms = 0; struct qcom_scm_vmperm dst_perms; u32 i; @@ -1693,16 +1696,20 @@ static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr { struct fastrpc_invoke_args args[2] = { 0 }; - /* Capability filled in userspace */ + /* + * Capability filled in userspace. This carries the information + * about the remoteproc support which is fetched from the remoteproc + * sysfs node by userspace. + */ dsp_attr_buf[0] = 0; + dsp_attr_buf_len -= 1; args[0].ptr = (u64)(uintptr_t)&dsp_attr_buf_len; args[0].length = sizeof(dsp_attr_buf_len); args[0].fd = -1; args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1]; - args[1].length = dsp_attr_buf_len; + args[1].length = dsp_attr_buf_len * sizeof(u32); args[1].fd = -1; - fl->pd = USER_PD; return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE, FASTRPC_SCALARS(0, 1, 1), args); @@ -1730,7 +1737,7 @@ static int fastrpc_get_info_from_kernel(struct fastrpc_ioctl_capability *cap, if (!dsp_attributes) return -ENOMEM; - err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES_LEN); + err = fastrpc_get_info_from_dsp(fl, dsp_attributes, FASTRPC_MAX_DSP_ATTRIBUTES); if (err == DSP_UNSUPPORTED_API) { dev_info(&cctx->rpdev->dev, "Warning: DSP capabilities not supported on domain: %d\n", domain); @@ -1783,7 +1790,7 @@ static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp) if (err) return err; - if (copy_to_user(argp, &cap.capability, sizeof(cap.capability))) + if (copy_to_user(argp, &cap, sizeof(cap))) return -EFAULT; return 0; @@ -2080,6 +2087,16 @@ static int fastrpc_req_mem_map(struct fastrpc_user *fl, char __user *argp) return err; } +static int is_attach_rejected(struct fastrpc_user *fl) +{ + /* Check if the device node is non-secure */ + if (!fl->is_secure_dev) { + dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); + return -EACCES; + } + return 0; +} + static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2092,13 +2109,19 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = fastrpc_init_attach(fl, ROOT_PD); + err = is_attach_rejected(fl); + if (!err) + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = fastrpc_init_attach(fl, SENSORS_PD); + err = is_attach_rejected(fl); + if (!err) + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = fastrpc_init_create_static_process(fl, argp); + err = is_attach_rejected(fl); + if (!err) + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c index 16695cb5e69c7..7c3d8bedf90ba 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_otpe2p.c @@ -153,7 +153,6 @@ static int pci1xxxx_eeprom_read(void *priv_t, unsigned int off, buf[byte] = readl(rb + MMAP_EEPROM_OFFSET(EEPROM_DATA_REG)); } - ret = byte; error: release_sys_lock(priv); return ret; @@ -197,7 +196,6 @@ static int pci1xxxx_eeprom_write(void *priv_t, unsigned int off, goto error; } } - ret = byte; error: release_sys_lock(priv); return ret; @@ -258,7 +256,6 @@ static int pci1xxxx_otp_read(void *priv_t, unsigned int off, buf[byte] = readl(rb + MMAP_OTP_OFFSET(OTP_RD_DATA_OFFSET)); } - ret = byte; error: release_sys_lock(priv); return ret; @@ -315,7 +312,6 @@ static int pci1xxxx_otp_write(void *priv_t, unsigned int off, goto error; } } - ret = byte; error: release_sys_lock(priv); return ret; diff --git a/drivers/misc/mei/platform-vsc.c b/drivers/misc/mei/platform-vsc.c index 1ec65d87488a3..d02f6e881139f 100644 --- a/drivers/misc/mei/platform-vsc.c +++ b/drivers/misc/mei/platform-vsc.c @@ -28,8 +28,8 @@ #define MEI_VSC_MAX_MSG_SIZE 512 -#define MEI_VSC_POLL_DELAY_US (50 * USEC_PER_MSEC) -#define MEI_VSC_POLL_TIMEOUT_US (200 * USEC_PER_MSEC) +#define MEI_VSC_POLL_DELAY_US (100 * USEC_PER_MSEC) +#define MEI_VSC_POLL_TIMEOUT_US (400 * USEC_PER_MSEC) #define mei_dev_to_vsc_hw(dev) ((struct mei_vsc_hw *)((dev)->hw)) diff --git a/drivers/misc/mei/vsc-fw-loader.c b/drivers/misc/mei/vsc-fw-loader.c index 596a9d695dfc1..084d0205f97d6 100644 --- a/drivers/misc/mei/vsc-fw-loader.c +++ b/drivers/misc/mei/vsc-fw-loader.c @@ -204,7 +204,7 @@ struct vsc_img_frag { /** * struct vsc_fw_loader - represent vsc firmware loader - * @dev: device used to request fimware + * @dev: device used to request firmware * @tp: transport layer used with the firmware loader * @csi: CSI image * @ace: ACE image diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index e6a98dba8a735..1618cca9a7317 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -331,12 +331,12 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret; } - ret = vsc_tp_dev_xfer(tp, tp->tx_buf, tp->rx_buf, len); + ret = vsc_tp_dev_xfer(tp, tp->tx_buf, ibuf ? tp->rx_buf : NULL, len); if (ret) return ret; if (ibuf) - cpu_to_be32_array(ibuf, tp->rx_buf, words); + be32_to_cpu_array(ibuf, tp->rx_buf, words); return ret; } @@ -568,6 +568,19 @@ static void vsc_tp_remove(struct spi_device *spi) free_irq(spi->irq, tp); } +static void vsc_tp_shutdown(struct spi_device *spi) +{ + struct vsc_tp *tp = spi_get_drvdata(spi); + + platform_device_unregister(tp->pdev); + + mutex_destroy(&tp->mutex); + + vsc_tp_reset(tp); + + free_irq(spi->irq, tp); +} + static const struct acpi_device_id vsc_tp_acpi_ids[] = { { "INTC1009" }, /* Raptor Lake */ { "INTC1058" }, /* Tiger Lake */ @@ -580,6 +593,7 @@ MODULE_DEVICE_TABLE(acpi, vsc_tp_acpi_ids); static struct spi_driver vsc_tp_driver = { .probe = vsc_tp_probe, .remove = vsc_tp_remove, + .shutdown = vsc_tp_shutdown, .driver = { .name = "vsc-tp", .acpi_match_table = vsc_tp_acpi_ids, diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index d7427894e0bc9..c302eb380e427 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -224,6 +224,9 @@ static void davinci_fifo_data_trans(struct mmc_davinci_host *host, } p = sgm->addr; + if (n > sgm->length) + n = sgm->length; + /* NOTE: we never transfer more than rw_threshold bytes * to/from the fifo here; there's no I/O overlap. * This also assumes that access width( i.e. ACCWD) is 4 bytes diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 112584aa07723..fbf7a91bed356 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -4727,6 +4727,21 @@ int sdhci_setup_host(struct sdhci_host *host) if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) { host->max_adma = 65532; /* 32-bit alignment */ mmc->max_seg_size = 65535; + /* + * sdhci_adma_table_pre() expects to define 1 DMA + * descriptor per segment, so the maximum segment size + * is set accordingly. SDHCI allows up to 64KiB per DMA + * descriptor (16-bit field), but some controllers do + * not support "zero means 65536" reducing the maximum + * for them to 65535. That is a problem if PAGE_SIZE is + * 64KiB because the block layer does not support + * max_seg_size < PAGE_SIZE, however + * sdhci_adma_table_pre() has a workaround to handle + * that case, and split the descriptor. Refer also + * comment in sdhci_adma_table_pre(). + */ + if (mmc->max_seg_size < PAGE_SIZE) + mmc->max_seg_size = PAGE_SIZE; } else { mmc->max_seg_size = 65536; } diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d7dbbd469b892..53e16d39af4bf 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -1093,28 +1093,32 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs, unsigned int offset_in_page) { struct mtd_info *mtd = nand_to_mtd(chip); + bool ident_stage = !mtd->writesize; - /* Make sure the offset is less than the actual page size. */ - if (offset_in_page > mtd->writesize + mtd->oobsize) - return -EINVAL; + /* Bypass all checks during NAND identification */ + if (likely(!ident_stage)) { + /* Make sure the offset is less than the actual page size. */ + if (offset_in_page > mtd->writesize + mtd->oobsize) + return -EINVAL; - /* - * On small page NANDs, there's a dedicated command to access the OOB - * area, and the column address is relative to the start of the OOB - * area, not the start of the page. Asjust the address accordingly. - */ - if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize) - offset_in_page -= mtd->writesize; + /* + * On small page NANDs, there's a dedicated command to access the OOB + * area, and the column address is relative to the start of the OOB + * area, not the start of the page. Asjust the address accordingly. + */ + if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize) + offset_in_page -= mtd->writesize; - /* - * The offset in page is expressed in bytes, if the NAND bus is 16-bit - * wide, then it must be divided by 2. - */ - if (chip->options & NAND_BUSWIDTH_16) { - if (WARN_ON(offset_in_page % 2)) - return -EINVAL; + /* + * The offset in page is expressed in bytes, if the NAND bus is 16-bit + * wide, then it must be divided by 2. + */ + if (chip->options & NAND_BUSWIDTH_16) { + if (WARN_ON(offset_in_page % 2)) + return -EINVAL; - offset_in_page /= 2; + offset_in_page /= 2; + } } addrs[0] = offset_in_page; @@ -1123,7 +1127,7 @@ static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs, * Small page NANDs use 1 cycle for the columns, while large page NANDs * need 2 */ - if (mtd->writesize <= 512) + if (!ident_stage && mtd->writesize <= 512) return 1; addrs[1] = offset_in_page >> 8; @@ -1436,16 +1440,19 @@ int nand_change_read_column_op(struct nand_chip *chip, unsigned int len, bool force_8bit) { struct mtd_info *mtd = nand_to_mtd(chip); + bool ident_stage = !mtd->writesize; if (len && !buf) return -EINVAL; - if (offset_in_page + len > mtd->writesize + mtd->oobsize) - return -EINVAL; + if (!ident_stage) { + if (offset_in_page + len > mtd->writesize + mtd->oobsize) + return -EINVAL; - /* Small page NANDs do not support column change. */ - if (mtd->writesize <= 512) - return -ENOTSUPP; + /* Small page NANDs do not support column change. */ + if (mtd->writesize <= 512) + return -ENOTSUPP; + } if (nand_has_exec_op(chip)) { const struct nand_interface_config *conf = @@ -2173,7 +2180,7 @@ EXPORT_SYMBOL_GPL(nand_reset_op); int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, bool force_8bit, bool check_only) { - if (!len || !buf) + if (!len || (!check_only && !buf)) return -EINVAL; if (nand_has_exec_op(chip)) { @@ -6301,6 +6308,7 @@ static const struct nand_ops rawnand_ops = { static int nand_scan_tail(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + struct nand_device *base = &chip->base; struct nand_ecc_ctrl *ecc = &chip->ecc; int ret, i; @@ -6445,9 +6453,13 @@ static int nand_scan_tail(struct nand_chip *chip) if (!ecc->write_oob_raw) ecc->write_oob_raw = ecc->write_oob; - /* propagate ecc info to mtd_info */ + /* Propagate ECC info to the generic NAND and MTD layers */ mtd->ecc_strength = ecc->strength; + if (!base->ecc.ctx.conf.strength) + base->ecc.ctx.conf.strength = ecc->strength; mtd->ecc_step_size = ecc->size; + if (!base->ecc.ctx.conf.step_size) + base->ecc.ctx.conf.step_size = ecc->size; /* * Set the number of read / write steps for one page depending on ECC @@ -6455,6 +6467,8 @@ static int nand_scan_tail(struct nand_chip *chip) */ if (!ecc->steps) ecc->steps = mtd->writesize / ecc->size; + if (!base->ecc.ctx.nsteps) + base->ecc.ctx.nsteps = ecc->steps; if (ecc->steps * ecc->size != mtd->writesize) { WARN(1, "Invalid ECC parameters\n"); ret = -EINVAL; diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c index 7baaef69d70ad..55580447633be 100644 --- a/drivers/mtd/nand/raw/rockchip-nand-controller.c +++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c @@ -420,13 +420,13 @@ static int rk_nfc_setup_interface(struct nand_chip *chip, int target, u32 rate, tc2rw, trwpw, trw2c; u32 temp; - if (target < 0) - return 0; - timings = nand_get_sdr_timings(conf); if (IS_ERR(timings)) return -EOPNOTSUPP; + if (target < 0) + return 0; + if (IS_ERR(nfc->nfc_clk)) rate = clk_get_rate(nfc->ahb_clk); else diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 0cacd7027e352..bc80fb6397dcd 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1214,9 +1214,9 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond, __be32 target; if (newval->string) { - if (!in4_pton(newval->string+1, -1, (u8 *)&target, -1, NULL)) { - netdev_err(bond->dev, "invalid ARP target %pI4 specified\n", - &target); + if (strlen(newval->string) < 1 || + !in4_pton(newval->string + 1, -1, (u8 *)&target, -1, NULL)) { + netdev_err(bond->dev, "invalid ARP target specified\n"); return ret; } if (newval->string[0] == '+') diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 7292c81fc0cdc..024169461cad0 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -125,6 +125,7 @@ static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_liste static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = { .quirks = 0, + .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 02f07b870f10f..268949939636a 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1047,31 +1047,31 @@ static int lan9303_get_sset_count(struct dsa_switch *ds, int port, int sset) return ARRAY_SIZE(lan9303_mib); } -static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum) +static int lan9303_phy_read(struct dsa_switch *ds, int port, int regnum) { struct lan9303 *chip = ds->priv; int phy_base = chip->phy_addr_base; - if (phy == phy_base) + if (port == 0) return lan9303_virt_phy_reg_read(chip, regnum); - if (phy > phy_base + 2) + if (port > 2) return -ENODEV; - return chip->ops->phy_read(chip, phy, regnum); + return chip->ops->phy_read(chip, phy_base + port, regnum); } -static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, +static int lan9303_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) { struct lan9303 *chip = ds->priv; int phy_base = chip->phy_addr_base; - if (phy == phy_base) + if (port == 0) return lan9303_virt_phy_reg_write(chip, regnum, val); - if (phy > phy_base + 2) + if (port > 2) return -ENODEV; - return chip->ops->phy_write(chip, phy, regnum, val); + return chip->ops->phy_write(chip, phy_base + port, regnum, val); } static int lan9303_port_enable(struct dsa_switch *ds, int port, @@ -1099,7 +1099,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port) vlan_vid_del(dsa_port_to_conduit(dp), htons(ETH_P_8021Q), port); lan9303_disable_processing_port(chip, port); - lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN); + lan9303_phy_write(ds, port, MII_BMCR, BMCR_PDOWN); } static int lan9303_port_bridge_join(struct dsa_switch *ds, int port, @@ -1374,8 +1374,6 @@ static const struct dsa_switch_ops lan9303_switch_ops = { static int lan9303_register_switch(struct lan9303 *chip) { - int base; - chip->ds = devm_kzalloc(chip->dev, sizeof(*chip->ds), GFP_KERNEL); if (!chip->ds) return -ENOMEM; @@ -1385,8 +1383,7 @@ static int lan9303_register_switch(struct lan9303 *chip) chip->ds->priv = chip; chip->ds->ops = &lan9303_switch_ops; chip->ds->phylink_mac_ops = &lan9303_phylink_mac_ops; - base = chip->phy_addr_base; - chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1 + base, base); + chip->ds->phys_mii_mask = GENMASK(LAN9303_NUM_PORTS - 1, 0); return dsa_register_switch(chip->ds); } diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index a806dadc41965..20c6529ec1350 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -1380,6 +1380,7 @@ static int bcmasp_probe(struct platform_device *pdev) dev_err(dev, "Cannot create eth interface %d\n", i); bcmasp_remove_intfs(priv); of_node_put(intf_node); + ret = -ENOMEM; goto of_put_exit; } list_add_tail(&intf->list, &priv->intfs); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index e2a4e1088b7f4..9580ab83d387c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1262,7 +1262,7 @@ enum { struct bnx2x_fw_stats_req { struct stats_query_header hdr; - struct stats_query_entry query[FP_SB_MAX_E1x+ + struct stats_query_entry query[FP_SB_MAX_E2 + BNX2X_FIRST_QUEUE_QUERY_IDX]; }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a6d69a45fa014..43952689bfb0c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6146,6 +6146,24 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp) return max_ring; } +u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp) +{ + u16 i, tbl_size, max_ring = 0; + struct bnxt_rss_ctx *rss_ctx; + + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + return 0; + + tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + + list_for_each_entry(rss_ctx, &bp->rss_ctx_list, list) { + for (i = 0; i < tbl_size; i++) + max_ring = max(max_ring, rss_ctx->rss_indir_tbl[i]); + } + + return max_ring; +} + int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings) { if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { @@ -12669,7 +12687,11 @@ bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx) if (!BNXT_NEW_RM(bp)) return true; - if (hwr.vnic == bp->hw_resc.resv_vnics && + /* Do not reduce VNIC and RSS ctx reservations. There is a FW + * issue that will mess up the default VNIC if we reduce the + * reservations. + */ + if (hwr.vnic <= bp->hw_resc.resv_vnics && hwr.rss_ctx <= bp->hw_resc.resv_rsscos_ctxs) return true; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9cf0acfa04e57..6b10a09ee1af7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2776,6 +2776,7 @@ int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, void bnxt_fill_ipv6_mask(__be32 mask[4]); int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx); +u16 bnxt_get_max_rss_ctx_ring(struct bnxt *bp); int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings); int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic); int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8763f8a01457f..79c09c1cdf936 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -961,6 +961,12 @@ static int bnxt_set_channels(struct net_device *dev, return rc; } + if (req_rx_rings < bp->rx_nr_rings && + req_rx_rings <= bnxt_get_max_rss_ctx_ring(bp)) { + netdev_warn(dev, "Can't deactivate rings used by RSS contexts\n"); + return -EINVAL; + } + if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && netif_is_rxfh_configured(dev)) { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 2e98a2a0bead9..ce227b56cf724 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1108,6 +1108,46 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) return 0; } +/** + * e1000e_force_smbus - Force interfaces to transition to SMBUS mode. + * @hw: pointer to the HW structure + * + * Force the MAC and the PHY to SMBUS mode. Assumes semaphore already + * acquired. + * + * Return: 0 on success, negative errno on failure. + **/ +static s32 e1000e_force_smbus(struct e1000_hw *hw) +{ + u16 smb_ctrl = 0; + u32 ctrl_ext; + s32 ret_val; + + /* Switching PHY interface always returns MDI error + * so disable retry mechanism to avoid wasting time + */ + e1000e_disable_phy_retry(hw); + + /* Force SMBus mode in the PHY */ + ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &smb_ctrl); + if (ret_val) { + e1000e_enable_phy_retry(hw); + return ret_val; + } + + smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS; + e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, smb_ctrl); + + e1000e_enable_phy_retry(hw); + + /* Force SMBus mode in the MAC */ + ctrl_ext = er32(CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS; + ew32(CTRL_EXT, ctrl_ext); + + return 0; +} + /** * e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP * @hw: pointer to the HW structure @@ -1165,6 +1205,14 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) if (ret_val) goto out; + if (hw->mac.type != e1000_pch_mtp) { + ret_val = e1000e_force_smbus(hw); + if (ret_val) { + e_dbg("Failed to force SMBUS: %d\n", ret_val); + goto release; + } + } + /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ @@ -1225,27 +1273,12 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) } release: - /* Switching PHY interface always returns MDI error - * so disable retry mechanism to avoid wasting time - */ - e1000e_disable_phy_retry(hw); - - /* Force SMBus mode in PHY */ - ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); - if (ret_val) { - e1000e_enable_phy_retry(hw); - hw->phy.ops.release(hw); - goto out; + if (hw->mac.type == e1000_pch_mtp) { + ret_val = e1000e_force_smbus(hw); + if (ret_val) + e_dbg("Failed to force SMBUS over MTL system: %d\n", + ret_val); } - phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; - e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); - - e1000e_enable_phy_retry(hw); - - /* Force SMBus mode in MAC */ - mac_reg = er32(CTRL_EXT); - mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; - ew32(CTRL_EXT, mac_reg); hw->phy.ops.release(hw); out: diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index da5c59daf8ba9..3cd161c6672be 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6363,49 +6363,49 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; ew32(EXTCNF_CTRL, mac_data); - /* Enable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data |= BIT(22); - ew32(FEXTNVM7, mac_data); - /* Disable disconnected cable conditioning for Power Gating */ mac_data = er32(DPGFR); mac_data |= BIT(2); ew32(DPGFR, mac_data); - /* Don't wake from dynamic Power Gating with clock request */ - mac_data = er32(FEXTNVM12); - mac_data |= BIT(12); - ew32(FEXTNVM12, mac_data); - - /* Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data &= ~BIT(28); - ew32(FEXTNVM9, mac_data); - - /* Enable K1 off to enable mPHY Power Gating */ - mac_data = er32(FEXTNVM6); - mac_data |= BIT(31); - ew32(FEXTNVM6, mac_data); - - /* Enable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data |= BIT(9); - ew32(FEXTNVM8, mac_data); - /* Enable the Dynamic Clock Gating in the DMA and MAC */ mac_data = er32(CTRL_EXT); mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN; ew32(CTRL_EXT, mac_data); - - /* No MAC DPG gating SLP_S0 in modern standby - * Switch the logic of the lanphypc to use PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data |= BIT(7); - ew32(FEXTNVM5, mac_data); } + /* Enable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data |= BIT(22); + ew32(FEXTNVM7, mac_data); + + /* Don't wake from dynamic Power Gating with clock request */ + mac_data = er32(FEXTNVM12); + mac_data |= BIT(12); + ew32(FEXTNVM12, mac_data); + + /* Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data &= ~BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Enable K1 off to enable mPHY Power Gating */ + mac_data = er32(FEXTNVM6); + mac_data |= BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Enable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data |= BIT(9); + ew32(FEXTNVM8, mac_data); + + /* No MAC DPG gating SLP_S0 in modern standby + * Switch the logic of the lanphypc to use PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data |= BIT(7); + ew32(FEXTNVM5, mac_data); + /* Disable the time synchronization clock */ mac_data = er32(FEXTNVM7); mac_data |= BIT(31); @@ -6498,33 +6498,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) } else { /* Request driver unconfigure the device from S0ix */ - /* Disable the Dynamic Power Gating in the MAC */ - mac_data = er32(FEXTNVM7); - mac_data &= 0xFFBFFFFF; - ew32(FEXTNVM7, mac_data); - - /* Disable mPHY power gating for any link and speed */ - mac_data = er32(FEXTNVM8); - mac_data &= ~BIT(9); - ew32(FEXTNVM8, mac_data); - - /* Disable K1 off */ - mac_data = er32(FEXTNVM6); - mac_data &= ~BIT(31); - ew32(FEXTNVM6, mac_data); - - /* Disable Ungate PGCB clock */ - mac_data = er32(FEXTNVM9); - mac_data |= BIT(28); - ew32(FEXTNVM9, mac_data); - - /* Cancel not waking from dynamic - * Power Gating with clock request - */ - mac_data = er32(FEXTNVM12); - mac_data &= ~BIT(12); - ew32(FEXTNVM12, mac_data); - /* Cancel disable disconnected cable conditioning * for Power Gating */ @@ -6537,13 +6510,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) mac_data &= 0xFFF7FFFF; ew32(CTRL_EXT, mac_data); - /* Revert the lanphypc logic to use the internal Gbe counter - * and not the PMC counter - */ - mac_data = er32(FEXTNVM5); - mac_data &= 0xFFFFFF7F; - ew32(FEXTNVM5, mac_data); - /* Enable the periodic inband message, * Request PCIe clock in K1 page770_17[10:9] =01b */ @@ -6581,6 +6547,40 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) mac_data &= ~BIT(31); mac_data |= BIT(0); ew32(FEXTNVM7, mac_data); + + /* Disable the Dynamic Power Gating in the MAC */ + mac_data = er32(FEXTNVM7); + mac_data &= 0xFFBFFFFF; + ew32(FEXTNVM7, mac_data); + + /* Disable mPHY power gating for any link and speed */ + mac_data = er32(FEXTNVM8); + mac_data &= ~BIT(9); + ew32(FEXTNVM8, mac_data); + + /* Disable K1 off */ + mac_data = er32(FEXTNVM6); + mac_data &= ~BIT(31); + ew32(FEXTNVM6, mac_data); + + /* Disable Ungate PGCB clock */ + mac_data = er32(FEXTNVM9); + mac_data |= BIT(28); + ew32(FEXTNVM9, mac_data); + + /* Cancel not waking from dynamic + * Power Gating with clock request + */ + mac_data = er32(FEXTNVM12); + mac_data &= ~BIT(12); + ew32(FEXTNVM12, mac_data); + + /* Revert the lanphypc logic to use the internal Gbe counter + * and not the PMC counter + */ + mac_data = er32(FEXTNVM5); + mac_data &= 0xFFFFFF7F; + ew32(FEXTNVM5, mac_data); } static int e1000e_pm_freeze(struct device *dev) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h index ee86d2c53079e..55b5bb884d736 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h @@ -109,10 +109,6 @@ static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc) -EFBIG, /* I40E_AQ_RC_EFBIG */ }; - /* aq_rc is invalid if AQ timed out */ - if (aq_ret == -EIO) - return -EAGAIN; - if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0])))) return -ERANGE; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 284c3fad5a6e4..310513d9321b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13293,6 +13293,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, bool need_reset; int i; + /* VSI shall be deleted in a moment, block loading new programs */ + if (prog && test_bit(__I40E_IN_REMOVE, pf->state)) + return -EINVAL; + /* Don't allow frames that span over multiple buffers */ if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) { NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); @@ -13301,14 +13305,9 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, /* When turning XDP on->off/off->on we reset and rebuild the rings. */ need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); - if (need_reset) i40e_prep_for_reset(pf); - /* VSI shall be deleted in a moment, just return EINVAL */ - if (test_bit(__I40E_IN_REMOVE, pf->state)) - return -EINVAL; - old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { diff --git a/drivers/net/ethernet/intel/ice/ice_hwmon.c b/drivers/net/ethernet/intel/ice/ice_hwmon.c index e4c2c1bff6c08..b7aa6812510a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_hwmon.c +++ b/drivers/net/ethernet/intel/ice/ice_hwmon.c @@ -96,7 +96,7 @@ static bool ice_is_internal_reading_supported(struct ice_pf *pf) unsigned long sensors = pf->hw.dev_caps.supported_sensors; - return _test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors); + return test_bit(ICE_SENSOR_SUPPORT_E810_INT_TEMP_BIT, &sensors); }; void ice_hwmon_init(struct ice_pf *pf) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0f17fc1181d28..fefaf52fd677a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1559,6 +1559,10 @@ void ice_ptp_extts_event(struct ice_pf *pf) u8 chan, tmr_idx; u32 hi, lo; + /* Don't process timestamp events if PTP is not ready */ + if (pf->ptp.state != ICE_PTP_READY) + return; + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* Event time is captured by one of the two matched registers * GLTSYN_EVNT_L: 32 LSB of sampled time event @@ -1584,27 +1588,33 @@ void ice_ptp_extts_event(struct ice_pf *pf) /** * ice_ptp_cfg_extts - Configure EXTTS pin and channel * @pf: Board private structure - * @ena: true to enable; false to disable * @chan: GPIO channel (0-3) - * @gpio_pin: GPIO pin - * @extts_flags: request flags from the ptp_extts_request.flags + * @config: desired EXTTS configuration. + * @store: If set to true, the values will be stored + * + * Configure an external timestamp event on the requested channel. + * + * Return: 0 on success, -EOPNOTUSPP on unsupported flags */ -static int -ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, - unsigned int extts_flags) +static int ice_ptp_cfg_extts(struct ice_pf *pf, unsigned int chan, + struct ice_extts_channel *config, bool store) { u32 func, aux_reg, gpio_reg, irq_reg; struct ice_hw *hw = &pf->hw; u8 tmr_idx; - if (chan > (unsigned int)pf->ptp.info.n_ext_ts) - return -EINVAL; + /* Reject requests with unsupported flags */ + if (config->flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; irq_reg = rd32(hw, PFINT_OICR_ENA); - if (ena) { + if (config->ena) { /* Enable the interrupt */ irq_reg |= PFINT_OICR_TSYN_EVNT_M; aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M; @@ -1613,9 +1623,9 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, #define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE BIT(1) /* set event level to requested edge */ - if (extts_flags & PTP_FALLING_EDGE) + if (config->flags & PTP_FALLING_EDGE) aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE; - if (extts_flags & PTP_RISING_EDGE) + if (config->flags & PTP_RISING_EDGE) aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE; /* Write GPIO CTL reg. @@ -1636,11 +1646,51 @@ ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin, wr32(hw, PFINT_OICR_ENA, irq_reg); wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg); - wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg); + wr32(hw, GLGEN_GPIO_CTL(config->gpio_pin), gpio_reg); + + if (store) + memcpy(&pf->ptp.extts_channels[chan], config, sizeof(*config)); return 0; } +/** + * ice_ptp_disable_all_extts - Disable all EXTTS channels + * @pf: Board private structure + */ +static void ice_ptp_disable_all_extts(struct ice_pf *pf) +{ + struct ice_extts_channel extts_cfg = {}; + int i; + + for (i = 0; i < pf->ptp.info.n_ext_ts; i++) { + if (pf->ptp.extts_channels[i].ena) { + extts_cfg.gpio_pin = pf->ptp.extts_channels[i].gpio_pin; + extts_cfg.ena = false; + ice_ptp_cfg_extts(pf, i, &extts_cfg, false); + } + } + + synchronize_irq(pf->oicr_irq.virq); +} + +/** + * ice_ptp_enable_all_extts - Enable all EXTTS channels + * @pf: Board private structure + * + * Called during reset to restore user configuration. + */ +static void ice_ptp_enable_all_extts(struct ice_pf *pf) +{ + int i; + + for (i = 0; i < pf->ptp.info.n_ext_ts; i++) { + if (pf->ptp.extts_channels[i].ena) + ice_ptp_cfg_extts(pf, i, &pf->ptp.extts_channels[i], + false); + } +} + /** * ice_ptp_cfg_clkout - Configure clock to generate periodic wave * @pf: Board private structure @@ -1659,6 +1709,9 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, u32 func, val, gpio_pin; u8 tmr_idx; + if (config && config->flags & ~PTP_PEROUT_PHASE) + return -EOPNOTSUPP; + tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; /* 0. Reset mode & out_en in AUX_OUT */ @@ -1795,17 +1848,18 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, struct ptp_clock_request *rq, int on) { struct ice_pf *pf = ptp_info_to_pf(info); - struct ice_perout_channel clk_cfg = {0}; bool sma_pres = false; unsigned int chan; u32 gpio_pin; - int err; if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) sma_pres = true; switch (rq->type) { case PTP_CLK_REQ_PEROUT: + { + struct ice_perout_channel clk_cfg = {}; + chan = rq->perout.index; if (sma_pres) { if (chan == ice_pin_desc_e810t[SMA1].chan) @@ -1825,15 +1879,19 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, clk_cfg.gpio_pin = chan; } + clk_cfg.flags = rq->perout.flags; clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) + rq->perout.period.nsec); clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) + rq->perout.start.nsec); clk_cfg.ena = !!on; - err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true); - break; + return ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true); + } case PTP_CLK_REQ_EXTTS: + { + struct ice_extts_channel extts_cfg = {}; + chan = rq->extts.index; if (sma_pres) { if (chan < ice_pin_desc_e810t[SMA2].chan) @@ -1849,14 +1907,15 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, gpio_pin = chan; } - err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin, - rq->extts.flags); - break; + extts_cfg.flags = rq->extts.flags; + extts_cfg.gpio_pin = gpio_pin; + extts_cfg.ena = !!on; + + return ice_ptp_cfg_extts(pf, chan, &extts_cfg, true); + } default: return -EOPNOTSUPP; } - - return err; } /** @@ -1869,26 +1928,32 @@ static int ice_ptp_gpio_enable_e823(struct ptp_clock_info *info, struct ptp_clock_request *rq, int on) { struct ice_pf *pf = ptp_info_to_pf(info); - struct ice_perout_channel clk_cfg = {0}; - int err; switch (rq->type) { case PTP_CLK_REQ_PPS: + { + struct ice_perout_channel clk_cfg = {}; + + clk_cfg.flags = rq->perout.flags; clk_cfg.gpio_pin = PPS_PIN_INDEX; clk_cfg.period = NSEC_PER_SEC; clk_cfg.ena = !!on; - err = ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true); - break; + return ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true); + } case PTP_CLK_REQ_EXTTS: - err = ice_ptp_cfg_extts(pf, !!on, rq->extts.index, - TIME_SYNC_PIN_INDEX, rq->extts.flags); - break; + { + struct ice_extts_channel extts_cfg = {}; + + extts_cfg.flags = rq->extts.flags; + extts_cfg.gpio_pin = TIME_SYNC_PIN_INDEX; + extts_cfg.ena = !!on; + + return ice_ptp_cfg_extts(pf, rq->extts.index, &extts_cfg, true); + } default: return -EOPNOTSUPP; } - - return err; } /** @@ -2720,6 +2785,10 @@ static int ice_ptp_rebuild_owner(struct ice_pf *pf) ice_ptp_restart_all_phy(pf); } + /* Re-enable all periodic outputs and external timestamp events */ + ice_ptp_enable_all_clkout(pf); + ice_ptp_enable_all_extts(pf); + return 0; } @@ -3275,6 +3344,8 @@ void ice_ptp_release(struct ice_pf *pf) ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx); + ice_ptp_disable_all_extts(pf); + kthread_cancel_delayed_work_sync(&pf->ptp.work); ice_ptp_port_phy_stop(&pf->ptp.port); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 3af20025043a6..e2af9749061ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -29,10 +29,17 @@ enum ice_ptp_pin_e810t { struct ice_perout_channel { bool ena; u32 gpio_pin; + u32 flags; u64 period; u64 start_time; }; +struct ice_extts_channel { + bool ena; + u32 gpio_pin; + u32 flags; +}; + /* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp * is stored in a buffer of registers. Depending on the specific hardware, * this buffer might be shared across multiple PHY ports. @@ -226,6 +233,7 @@ enum ice_ptp_state { * @ext_ts_irq: the external timestamp IRQ in use * @kworker: kwork thread for handling periodic work * @perout_channels: periodic output data + * @extts_channels: channels for external timestamps * @info: structure defining PTP hardware capabilities * @clock: pointer to registered PTP clock device * @tstamp_config: hardware timestamping configuration @@ -249,6 +257,7 @@ struct ice_ptp { u8 ext_ts_irq; struct kthread_worker *kworker; struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX]; + struct ice_extts_channel extts_channels[GLTSYN_TGT_H_IDX_MAX]; struct ptp_clock_info info; struct ptp_clock *clock; struct hwtstamp_config tstamp_config; diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 5352fee62d2b8..0b99828043708 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -217,9 +217,9 @@ ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch) if (ch->dma.irq) free_irq(ch->dma.irq, priv); if (IS_RX(ch->idx)) { - int desc; + struct ltq_dma_channel *dma = &ch->dma; - for (desc = 0; desc < LTQ_DESC_NUM; desc++) + for (dma->desc = 0; dma->desc < LTQ_DESC_NUM; dma->desc++) dev_kfree_skb_any(ch->skb[ch->dma.desc]); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 4a77f6fe26220..05b84581d5c56 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1745,7 +1745,7 @@ struct cpt_lf_alloc_req_msg { u16 nix_pf_func; u16 sso_pf_func; u16 eng_grpmsk; - int blkaddr; + u8 blkaddr; u8 ctx_ilen_valid : 1; u8 ctx_ilen : 7; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index d883157393ea0..6c3aca6f278db 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -63,8 +63,13 @@ enum npc_kpu_lb_ltype { NPC_LT_LB_CUSTOM1 = 0xF, }; +/* Don't modify ltypes up to IP6_EXT, otherwise length and checksum of IP + * headers may not be checked correctly. IPv4 ltypes and IPv6 ltypes must + * differ only at bit 0 so mask 0xE can be used to detect extended headers. + */ enum npc_kpu_lc_ltype { - NPC_LT_LC_IP = 1, + NPC_LT_LC_PTP = 1, + NPC_LT_LC_IP, NPC_LT_LC_IP_OPT, NPC_LT_LC_IP6, NPC_LT_LC_IP6_EXT, @@ -72,7 +77,6 @@ enum npc_kpu_lc_ltype { NPC_LT_LC_RARP, NPC_LT_LC_MPLS, NPC_LT_LC_NSH, - NPC_LT_LC_PTP, NPC_LT_LC_FCOE, NPC_LT_LC_NGIO, NPC_LT_LC_CUSTOM0 = 0xE, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index ff78251f92d44..5f661e67ccbcf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1643,7 +1643,7 @@ static int rvu_check_rsrc_availability(struct rvu *rvu, if (req->ssow > block->lf.max) { dev_err(&rvu->pdev->dev, "Func 0x%x: Invalid SSOW req, %d > max %d\n", - pcifunc, req->sso, block->lf.max); + pcifunc, req->ssow, block->lf.max); return -EINVAL; } mappedlfs = rvu_get_rsrc_mapcount(pfvf, block->addr); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index f047185f38e0f..3e09d22858147 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -696,7 +696,8 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req, struct cpt_rd_wr_reg_msg *rsp) { - int blkaddr; + u64 offset = req->reg_offset; + int blkaddr, lf; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) @@ -707,17 +708,25 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, !is_cpt_vf(rvu, req->hdr.pcifunc)) return CPT_AF_ERR_ACCESS_DENIED; - rsp->reg_offset = req->reg_offset; - rsp->ret_val = req->ret_val; - rsp->is_write = req->is_write; - if (!is_valid_offset(rvu, req)) return CPT_AF_ERR_ACCESS_DENIED; + /* Translate local LF used by VFs to global CPT LF */ + lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc, + (offset & 0xFFF) >> 3); + + /* Translate local LF's offset to global CPT LF's offset */ + offset &= 0xFF000; + offset += lf << 3; + + rsp->reg_offset = offset; + rsp->ret_val = req->ret_val; + rsp->is_write = req->is_write; + if (req->is_write) - rvu_write64(rvu, blkaddr, req->reg_offset, req->val); + rvu_write64(rvu, blkaddr, offset, req->val); else - rsp->val = rvu_read64(rvu, blkaddr, req->reg_offset); + rsp->val = rvu_read64(rvu, blkaddr, offset); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 00af8888e3291..3dc828cf6c5a6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3864,6 +3864,11 @@ static int get_flowkey_alg_idx(struct nix_hw *nix_hw, u32 flow_cfg) return -ERANGE; } +/* Mask to match ipv6(NPC_LT_LC_IP6) and ipv6 ext(NPC_LT_LC_IP6_EXT) */ +#define NPC_LT_LC_IP6_MATCH_MSK ((~(NPC_LT_LC_IP6 ^ NPC_LT_LC_IP6_EXT)) & 0xf) +/* Mask to match both ipv4(NPC_LT_LC_IP) and ipv4 ext(NPC_LT_LC_IP_OPT) */ +#define NPC_LT_LC_IP_MATCH_MSK ((~(NPC_LT_LC_IP ^ NPC_LT_LC_IP_OPT)) & 0xf) + static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) { int idx, nr_field, key_off, field_marker, keyoff_marker; @@ -3933,7 +3938,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->hdr_offset = 9; /* offset */ field->bytesm1 = 0; /* 1 byte */ field->ltype_match = NPC_LT_LC_IP; - field->ltype_mask = 0xF; + field->ltype_mask = NPC_LT_LC_IP_MATCH_MSK; break; case NIX_FLOW_KEY_TYPE_IPV4: case NIX_FLOW_KEY_TYPE_INNR_IPV4: @@ -3960,8 +3965,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->bytesm1 = 3; /* DIP, 4 bytes */ } } - - field->ltype_mask = 0xF; /* Match only IPv4 */ + field->ltype_mask = NPC_LT_LC_IP_MATCH_MSK; keyoff_marker = false; break; case NIX_FLOW_KEY_TYPE_IPV6: @@ -3990,7 +3994,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->bytesm1 = 15; /* DIP,16 bytes */ } } - field->ltype_mask = 0xF; /* Match only IPv6 */ + field->ltype_mask = NPC_LT_LC_IP6_MATCH_MSK; break; case NIX_FLOW_KEY_TYPE_TCP: case NIX_FLOW_KEY_TYPE_UDP: diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 31aebeb2e2858..25989c79c92e6 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1524,6 +1524,7 @@ static int mtk_star_probe(struct platform_device *pdev) { struct device_node *of_node; struct mtk_star_priv *priv; + struct phy_device *phydev; struct net_device *ndev; struct device *dev; void __iomem *base; @@ -1649,6 +1650,12 @@ static int mtk_star_probe(struct platform_device *pdev) netif_napi_add(ndev, &priv->rx_napi, mtk_star_rx_poll); netif_napi_add_tx(ndev, &priv->tx_napi, mtk_star_tx_poll); + phydev = of_phy_find_device(priv->phy_node); + if (phydev) { + phydev->mac_managed_pm = true; + put_device(&phydev->mdio.dev); + } + return devm_register_netdev(dev, ndev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index c54fd01ea635a..3d274599015be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -989,7 +989,12 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule; struct net *net = dev_net(x->xso.dev); + u64 trailer_packets = 0, trailer_bytes = 0; + u64 replay_packets = 0, replay_bytes = 0; + u64 auth_packets = 0, auth_bytes = 0; + u64 success_packets, success_bytes; u64 packets, bytes, lastuse; + size_t headers; lockdep_assert(lockdep_is_held(&x->lock) || lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) || @@ -999,26 +1004,43 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) return; if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { - mlx5_fc_query_cached(ipsec_rule->auth.fc, &bytes, &packets, &lastuse); - x->stats.integrity_failed += packets; - XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, packets); - - mlx5_fc_query_cached(ipsec_rule->trailer.fc, &bytes, &packets, &lastuse); - XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, packets); + mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes, + &auth_packets, &lastuse); + x->stats.integrity_failed += auth_packets; + XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets); + + mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes, + &trailer_packets, &lastuse); + XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets); } if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) return; - mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); - x->curlft.packets += packets; - x->curlft.bytes += bytes; - if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { - mlx5_fc_query_cached(ipsec_rule->replay.fc, &bytes, &packets, &lastuse); - x->stats.replay += packets; - XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, packets); + mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes, + &replay_packets, &lastuse); + x->stats.replay += replay_packets; + XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets); } + + mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse); + success_packets = packets - auth_packets - trailer_packets - replay_packets; + x->curlft.packets += success_packets; + /* NIC counts all bytes passed through flow steering and doesn't have + * an ability to count payload data size which is needed for SA. + * + * To overcome HW limitestion, let's approximate the payload size + * by removing always available headers. + */ + headers = sizeof(struct ethhdr); + if (sa_entry->attrs.family == AF_INET) + headers += sizeof(struct iphdr); + else + headers += sizeof(struct ipv6hdr); + + success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes; + x->curlft.bytes += success_bytes - headers * success_packets; } static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a605eae56685d..eedbcba226894 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5868,6 +5868,11 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) kfree(priv->htb_qos_sq_stats[i]); kvfree(priv->htb_qos_sq_stats); + if (priv->mqprio_rl) { + mlx5e_mqprio_rl_cleanup(priv->mqprio_rl); + mlx5e_mqprio_rl_free(priv->mqprio_rl); + } + memset(priv, 0, sizeof(*priv)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5693986ae6562..ac1565c0c8afc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1197,9 +1197,7 @@ static int get_num_eqs(struct mlx5_core_dev *dev) if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev)) return 1; - max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); + max_dev_eqs = mlx5_max_eq_cap_get(dev); num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table), max_dev_eqs - MLX5_MAX_ASYNC_EQS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index 50d2ea3239798..a436ce895e45a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -6,6 +6,9 @@ #include "helper.h" #include "ofld.h" +static int +acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + static bool esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) @@ -123,18 +126,31 @@ static int esw_acl_ingress_src_port_drop_create(struct mlx5_eswitch *esw, { struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *flow_rule; + bool created = false; int err = 0; + if (!vport->ingress.acl) { + err = acl_ingress_ofld_setup(esw, vport); + if (err) + return err; + created = true; + } + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; flow_act.fg = vport->ingress.offloads.drop_grp; flow_rule = mlx5_add_flow_rules(vport->ingress.acl, NULL, &flow_act, NULL, 0); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); - goto out; + goto err_out; } vport->ingress.offloads.drop_rule = flow_rule; -out: + + return 0; +err_out: + /* Only destroy ingress acl created in this function. */ + if (created) + esw_acl_ingress_ofld_cleanup(esw, vport); return err; } @@ -299,16 +315,12 @@ static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport) } } -int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int +acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int num_ftes = 0; int err; - if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && - !esw_acl_ingress_prio_tag_enabled(esw, vport)) - return 0; - esw_acl_ingress_allow_rule_destroy(vport); if (mlx5_eswitch_vport_match_metadata_enabled(esw)) @@ -347,6 +359,15 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, return err; } +int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !esw_acl_ingress_prio_tag_enabled(esw, vport)) + return 0; + + return acl_ingress_ofld_setup(esw, vport); +} + void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 592143d5e1da1..72949cb85244d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4600,20 +4600,26 @@ mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs, return -EOPNOTSUPP; } + if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support getting the max number of EQs"); + return -EOPNOTSUPP; + } + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); if (!query_ctx) return -ENOMEM; mutex_lock(&esw->state_lock); err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, - MLX5_CAP_GENERAL); + MLX5_CAP_GENERAL_2); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); goto out; } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs); + max_eqs = MLX5_GET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b); if (max_eqs < MLX5_ESW_MAX_CTRL_EQS) *max_io_eqs = 0; else @@ -4644,6 +4650,12 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs, return -EOPNOTSUPP; } + if (!MLX5_CAP_GEN_2(esw->dev, max_num_eqs_24b)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support changing the max number of EQs"); + return -EOPNOTSUPP; + } + if (check_add_overflow(max_io_eqs, MLX5_ESW_MAX_CTRL_EQS, &max_eqs)) { NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range"); return -EINVAL; @@ -4655,17 +4667,17 @@ mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs, mutex_lock(&esw->state_lock); err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, - MLX5_CAP_GENERAL); + MLX5_CAP_GENERAL_2); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); goto out; } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs); + MLX5_SET(cmd_hca_cap_2, hca_caps, max_num_eqs_24b, max_eqs); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c38342b9f3208..a7fd18888b6e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -383,4 +383,14 @@ static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vpo : vport; } +static inline int mlx5_max_eq_cap_get(const struct mlx5_core_dev *dev) +{ + if (MLX5_CAP_GEN_2(dev, max_num_eqs_24b)) + return MLX5_CAP_GEN_2(dev, max_num_eqs_24b); + + if (MLX5_CAP_GEN(dev, max_num_eqs)) + return MLX5_CAP_GEN(dev, max_num_eqs); + + return 1 << MLX5_CAP_GEN(dev, log_max_eq); +} #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index fb8787e30d3fa..401d390696802 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -711,9 +711,7 @@ int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) int mlx5_irq_table_create(struct mlx5_core_dev *dev) { - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); + int num_eqs = mlx5_max_eq_cap_get(dev); int total_vec; int pcif_vec; int req_vec; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c index 025e0db983feb..b032d5a4b3b84 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c @@ -1484,6 +1484,7 @@ static int mlxsw_linecard_types_init(struct mlxsw_core *mlxsw_core, vfree(types_info->data); err_data_alloc: kfree(types_info); + linecards->types_info = NULL; return err; } diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 6453c92f0fa7c..7fa1820db9cce 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -352,11 +352,11 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) netif_dbg(ks, intr, ks->netdev, "%s: txspace %d\n", __func__, tx_space); - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); ks->tx_space = tx_space; if (netif_queue_stopped(ks->netdev)) netif_wake_queue(ks->netdev); - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); } if (status & IRQ_SPIBEI) { @@ -482,6 +482,7 @@ static int ks8851_net_open(struct net_device *dev) ks8851_wrreg16(ks, KS_IER, ks->rc_ier); ks->queued_len = 0; + ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); netif_start_queue(ks->netdev); netif_dbg(ks, ifup, ks->netdev, "network device up\n"); @@ -635,14 +636,14 @@ static void ks8851_set_rx_mode(struct net_device *dev) /* schedule work to do the actual set of the data if needed */ - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) { memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl)); schedule_work(&ks->rxctrl_work); } - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); } static int ks8851_set_mac_address(struct net_device *dev, void *addr) @@ -1101,7 +1102,6 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev, int ret; ks->netdev = netdev; - ks->tx_space = 6144; ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); ret = PTR_ERR_OR_ZERO(ks->gpio); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 670c1de966db8..3062cc0f91992 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -340,10 +340,10 @@ static void ks8851_tx_work(struct work_struct *work) tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR); - spin_lock(&ks->statelock); + spin_lock_bh(&ks->statelock); ks->queued_len -= dequeued_len; ks->tx_space = tx_space; - spin_unlock(&ks->statelock); + spin_unlock_bh(&ks->statelock); ks8851_unlock_spi(ks, &flags); } diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index dcab638c57fe8..24c90d8f5a442 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -871,13 +871,13 @@ static void rswitch_tx_free(struct net_device *ndev) dma_rmb(); skb = gq->skbs[gq->dirty]; if (skb) { + rdev->ndev->stats.tx_packets++; + rdev->ndev->stats.tx_bytes += skb->len; dma_unmap_single(ndev->dev.parent, gq->unmap_addrs[gq->dirty], skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(gq->skbs[gq->dirty]); gq->skbs[gq->dirty] = NULL; - rdev->ndev->stats.tx_packets++; - rdev->ndev->stats.tx_bytes += skb->len; } desc->desc.die_dt = DT_EEMPTY; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 65d7370b47d57..466c4002f00d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -272,7 +272,7 @@ static const struct ethqos_emac_por emac_v4_0_0_por[] = { static const struct ethqos_emac_driver_data emac_v4_0_0_data = { .por = emac_v4_0_0_por, - .num_por = ARRAY_SIZE(emac_v3_0_0_por), + .num_por = ARRAY_SIZE(emac_v4_0_0_por), .rgmii_config_loopback_en = false, .has_emac_ge_3 = true, .link_clk_name = "phyaux", diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b3afc7cb7d72d..c58782c41417a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7662,9 +7662,10 @@ int stmmac_dvr_probe(struct device *device, #ifdef STMMAC_VLAN_TAG_USED /* Both mac100 and gmac support receive VLAN tag detection */ ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX; - ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - priv->hw->hw_vlan_en = true; - + if (priv->plat->has_gmac4) { + ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + priv->hw->hw_vlan_en = true; + } if (priv->dma_cap.vlhash) { ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 7c4b6881a93fc..d1b682ce9c6db 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1959,6 +1959,7 @@ int wx_sw_init(struct wx *wx) } bitmap_zero(wx->state, WX_STATE_NBITS); + wx->misc_irq_domain = false; return 0; } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 68bde91b67a05..81bedc8ee8d42 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1686,6 +1686,7 @@ static int wx_set_interrupt_capability(struct wx *wx) } pdev->irq = pci_irq_vector(pdev, 0); + wx->num_q_vectors = 1; return 0; } @@ -1996,7 +1997,8 @@ void wx_free_irq(struct wx *wx) int vector; if (!(pdev->msix_enabled)) { - free_irq(pdev->irq, wx); + if (!wx->misc_irq_domain) + free_irq(pdev->irq, wx); return; } @@ -2011,7 +2013,7 @@ void wx_free_irq(struct wx *wx) free_irq(entry->vector, q_vector); } - if (wx->mac.type == wx_mac_em) + if (!wx->misc_irq_domain) free_irq(wx->msix_entry->vector, wx); } EXPORT_SYMBOL(wx_free_irq); @@ -2026,6 +2028,9 @@ int wx_setup_isb_resources(struct wx *wx) { struct pci_dev *pdev = wx->pdev; + if (wx->isb_mem) + return 0; + wx->isb_mem = dma_alloc_coherent(&pdev->dev, sizeof(u32) * 4, &wx->isb_dma, @@ -2385,7 +2390,6 @@ static void wx_free_all_tx_resources(struct wx *wx) void wx_free_resources(struct wx *wx) { - wx_free_isb_resources(wx); wx_free_all_rx_resources(wx); wx_free_all_tx_resources(wx); } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 5aaf7b1fa2db9..0df7f5712b6f7 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1058,6 +1058,7 @@ struct wx { dma_addr_t isb_dma; u32 *isb_mem; u32 isb_tag[WX_ISB_MAX]; + bool misc_irq_domain; #define WX_MAX_RETA_ENTRIES 128 #define WX_RSS_INDIR_TBL_MAX 64 diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e894e01d030d1..af30ca0312b81 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -387,6 +387,7 @@ static int ngbe_open(struct net_device *netdev) err_free_irq: wx_free_irq(wx); err_free_resources: + wx_free_isb_resources(wx); wx_free_resources(wx); return err; } @@ -408,6 +409,7 @@ static int ngbe_close(struct net_device *netdev) ngbe_down(wx); wx_free_irq(wx); + wx_free_isb_resources(wx); wx_free_resources(wx); phylink_disconnect_phy(wx->phylink); wx_control_hw(wx, false); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index b3e3605d1edb3..a4cf682dca650 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -27,57 +27,19 @@ void txgbe_irq_enable(struct wx *wx, bool queues) } /** - * txgbe_intr - msi/legacy mode Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure - **/ -static irqreturn_t txgbe_intr(int __always_unused irq, void *data) -{ - struct wx_q_vector *q_vector; - struct wx *wx = data; - struct pci_dev *pdev; - u32 eicr; - - q_vector = wx->q_vector[0]; - pdev = wx->pdev; - - eicr = wx_misc_isb(wx, WX_ISB_VEC0); - if (!eicr) { - /* shared interrupt alert! - * the interrupt that we masked before the ICR read. - */ - if (netif_running(wx->netdev)) - txgbe_irq_enable(wx, true); - return IRQ_NONE; /* Not our interrupt */ - } - wx->isb_mem[WX_ISB_VEC0] = 0; - if (!(pdev->msi_enabled)) - wr32(wx, WX_PX_INTA, 1); - - wx->isb_mem[WX_ISB_MISC] = 0; - /* would disable interrupts here but it is auto disabled */ - napi_schedule_irqoff(&q_vector->napi); - - /* re-enable link(maybe) and non-queue interrupts, no flush. - * txgbe_poll will re-enable the queue interrupts - */ - if (netif_running(wx->netdev)) - txgbe_irq_enable(wx, false); - - return IRQ_HANDLED; -} - -/** - * txgbe_request_msix_irqs - Initialize MSI-X interrupts + * txgbe_request_queue_irqs - Initialize MSI-X queue interrupts * @wx: board private structure * - * Allocate MSI-X vectors and request interrupts from the kernel. + * Allocate MSI-X queue vectors and request interrupts from the kernel. **/ -static int txgbe_request_msix_irqs(struct wx *wx) +int txgbe_request_queue_irqs(struct wx *wx) { struct net_device *netdev = wx->netdev; int vector, err; + if (!wx->pdev->msix_enabled) + return 0; + for (vector = 0; vector < wx->num_q_vectors; vector++) { struct wx_q_vector *q_vector = wx->q_vector[vector]; struct msix_entry *entry = &wx->msix_q_entries[vector]; @@ -110,34 +72,6 @@ static int txgbe_request_msix_irqs(struct wx *wx) return err; } -/** - * txgbe_request_irq - initialize interrupts - * @wx: board private structure - * - * Attempt to configure interrupts using the best available - * capabilities of the hardware and kernel. - **/ -int txgbe_request_irq(struct wx *wx) -{ - struct net_device *netdev = wx->netdev; - struct pci_dev *pdev = wx->pdev; - int err; - - if (pdev->msix_enabled) - err = txgbe_request_msix_irqs(wx); - else if (pdev->msi_enabled) - err = request_irq(wx->pdev->irq, &txgbe_intr, 0, - netdev->name, wx); - else - err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED, - netdev->name, wx); - - if (err) - wx_err(wx, "request_irq failed, Error %d\n", err); - - return err; -} - static int txgbe_request_gpio_irq(struct txgbe *txgbe) { txgbe->gpio_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_GPIO); @@ -177,6 +111,36 @@ static const struct irq_domain_ops txgbe_misc_irq_domain_ops = { }; static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) +{ + struct wx_q_vector *q_vector; + struct txgbe *txgbe = data; + struct wx *wx = txgbe->wx; + u32 eicr; + + if (wx->pdev->msix_enabled) + return IRQ_WAKE_THREAD; + + eicr = wx_misc_isb(wx, WX_ISB_VEC0); + if (!eicr) { + /* shared interrupt alert! + * the interrupt that we masked before the ICR read. + */ + if (netif_running(wx->netdev)) + txgbe_irq_enable(wx, true); + return IRQ_NONE; /* Not our interrupt */ + } + wx->isb_mem[WX_ISB_VEC0] = 0; + if (!(wx->pdev->msi_enabled)) + wr32(wx, WX_PX_INTA, 1); + + /* would disable interrupts here but it is auto disabled */ + q_vector = wx->q_vector[0]; + napi_schedule_irqoff(&q_vector->napi); + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) { struct txgbe *txgbe = data; struct wx *wx = txgbe->wx; @@ -223,6 +187,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe) int txgbe_setup_misc_irq(struct txgbe *txgbe) { + unsigned long flags = IRQF_ONESHOT; struct wx *wx = txgbe->wx; int hwirq, err; @@ -236,14 +201,17 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe) irq_create_mapping(txgbe->misc.domain, hwirq); txgbe->misc.chip = txgbe_irq_chip; - if (wx->pdev->msix_enabled) + if (wx->pdev->msix_enabled) { txgbe->misc.irq = wx->msix_entry->vector; - else + } else { txgbe->misc.irq = wx->pdev->irq; + if (!wx->pdev->msi_enabled) + flags |= IRQF_SHARED; + } - err = request_threaded_irq(txgbe->misc.irq, NULL, - txgbe_misc_irq_handle, - IRQF_ONESHOT, + err = request_threaded_irq(txgbe->misc.irq, txgbe_misc_irq_handle, + txgbe_misc_irq_thread_fn, + flags, wx->netdev->name, txgbe); if (err) goto del_misc_irq; @@ -256,6 +224,8 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe) if (err) goto free_gpio_irq; + wx->misc_irq_domain = true; + return 0; free_gpio_irq: diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h index b77945e7a0f26..e6285b94625ea 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.h @@ -2,6 +2,6 @@ /* Copyright (c) 2015 - 2024 Beijing WangXun Technology Co., Ltd. */ void txgbe_irq_enable(struct wx *wx, bool queues); -int txgbe_request_irq(struct wx *wx); +int txgbe_request_queue_irqs(struct wx *wx); void txgbe_free_misc_irq(struct txgbe *txgbe); int txgbe_setup_misc_irq(struct txgbe *txgbe); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 8c7a74981b907..ca74d9422065a 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -294,9 +294,9 @@ static int txgbe_open(struct net_device *netdev) wx_configure(wx); - err = txgbe_request_irq(wx); + err = txgbe_request_queue_irqs(wx); if (err) - goto err_free_isb; + goto err_free_resources; /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues); @@ -313,8 +313,8 @@ static int txgbe_open(struct net_device *netdev) err_free_irq: wx_free_irq(wx); -err_free_isb: - wx_free_isb_resources(wx); +err_free_resources: + wx_free_resources(wx); err_reset: txgbe_reset(wx); @@ -729,6 +729,7 @@ static void txgbe_remove(struct pci_dev *pdev) txgbe_remove_phy(txgbe); txgbe_free_misc_irq(txgbe); + wx_free_isb_resources(wx); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index ffe7f463e16ee..ef6df0e37bea5 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -119,7 +119,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, skb->protocol = eth_type_trans(skb, ndev); skb->ip_summed = CHECKSUM_NONE; - if (__netif_rx(skb) == NET_RX_DROP) { + if (netif_rx(skb) == NET_RX_DROP) { ndev->stats.rx_errors++; ndev->stats.rx_dropped++; } else { diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h index 1c19ae74ad2b4..4830b25e6c7d3 100644 --- a/drivers/net/phy/aquantia/aquantia.h +++ b/drivers/net/phy/aquantia/aquantia.h @@ -6,6 +6,9 @@ * Author: Heiner Kallweit */ +#ifndef AQUANTIA_H +#define AQUANTIA_H + #include #include @@ -120,3 +123,5 @@ static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; } #endif int aqr_firmware_load(struct phy_device *phydev); + +#endif /* AQUANTIA_H */ diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index a838b61cd844b..a35528497a576 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -748,7 +748,7 @@ static int lan87xx_cable_test_report(struct phy_device *phydev) ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A, lan87xx_cable_test_report_trans(detect)); - return 0; + return phy_init_hw(phydev); } static int lan87xx_cable_test_get_status(struct phy_device *phydev, diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0a65b6d690feb..eb9acfcaeb097 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -70,6 +70,7 @@ #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ #define PPP_PROTO_LEN 2 +#define PPP_LCP_HDRLEN 4 /* * An instance of /dev/ppp can be associated with either a ppp @@ -493,6 +494,15 @@ static ssize_t ppp_read(struct file *file, char __user *buf, return ret; } +static bool ppp_check_packet(struct sk_buff *skb, size_t count) +{ + /* LCP packets must include LCP header which 4 bytes long: + * 1-byte code, 1-byte identifier, and 2-byte length. + */ + return get_unaligned_be16(skb->data) != PPP_LCP || + count >= PPP_PROTO_LEN + PPP_LCP_HDRLEN; +} + static ssize_t ppp_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -515,6 +525,11 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, kfree_skb(skb); goto out; } + ret = -EINVAL; + if (unlikely(!ppp_check_packet(skb, count))) { + kfree_skb(skb); + goto out; + } switch (pf->kind) { case INTERFACE: diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index 0ba714ca5185c..4b8528206cc8a 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -15,8 +15,8 @@ static void swap_endian(u8 *dst, const u8 *src, u8 bits) if (bits == 32) { *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); } else if (bits == 128) { - ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); - ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); + ((u64 *)dst)[0] = get_unaligned_be64(src); + ((u64 *)dst)[1] = get_unaligned_be64(src + 8); } } diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 1ea4f874e367e..7eb76724b3edb 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -124,10 +124,10 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) */ static inline int wg_cpumask_next_online(int *last_cpu) { - int cpu = cpumask_next(*last_cpu, cpu_online_mask); + int cpu = cpumask_next(READ_ONCE(*last_cpu), cpu_online_mask); if (cpu >= nr_cpu_ids) cpu = cpumask_first(cpu_online_mask); - *last_cpu = cpu; + WRITE_ONCE(*last_cpu, cpu); return cpu; } diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 0d48e0f4a1ba3..26e09c30d596c 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -222,7 +222,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer) { struct sk_buff *skb; - if (skb_queue_empty(&peer->staged_packet_queue)) { + if (skb_queue_empty_lockless(&peer->staged_packet_queue)) { skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, GFP_ATOMIC); if (unlikely(!skb)) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 18ce060df9b5b..dac6155ae1bd0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -654,7 +654,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES; if (iwl_fw_lookup_cmd_ver(mvm->fw, WOWLAN_KEK_KCK_MATERIAL, - IWL_FW_CMD_VER_UNKNOWN) == 3) + IWL_FW_CMD_VER_UNKNOWN) >= 3) hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK; if (fw_has_api(&mvm->fw->ucode_capa, @@ -1656,7 +1656,8 @@ static void iwl_mvm_prevent_esr_done_wk(struct wiphy *wiphy, struct iwl_mvm_vif *mvmvif = container_of(wk, struct iwl_mvm_vif, prevent_esr_done_wk.work); struct iwl_mvm *mvm = mvmvif->mvm; - struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm); + struct ieee80211_vif *vif = + container_of((void *)mvmvif, struct ieee80211_vif, drv_priv); mutex_lock(&mvm->mutex); iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_PREVENTION); @@ -1682,7 +1683,8 @@ static void iwl_mvm_unblock_esr_tpt(struct wiphy *wiphy, struct wiphy_work *wk) struct iwl_mvm_vif *mvmvif = container_of(wk, struct iwl_mvm_vif, unblock_esr_tpt_wk); struct iwl_mvm *mvm = mvmvif->mvm; - struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm); + struct ieee80211_vif *vif = + container_of((void *)mvmvif, struct ieee80211_vif, drv_priv); mutex_lock(&mvm->mutex); iwl_mvm_unblock_esr(mvm, vif, IWL_MVM_ESR_BLOCKED_TPT); @@ -6410,11 +6412,9 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, if (sync) { lockdep_assert_held(&mvm->mutex); ret = wait_event_timeout(mvm->rx_sync_waitq, - READ_ONCE(mvm->queue_sync_state) == 0 || - iwl_mvm_is_radio_hw_killed(mvm), + READ_ONCE(mvm->queue_sync_state) == 0, SYNC_RX_QUEUE_TIMEOUT); - WARN_ONCE(!ret && !iwl_mvm_is_radio_hw_killed(mvm), - "queue sync: failed to sync, state is 0x%lx, cookie %d\n", + WARN_ONCE(!ret, "queue sync: failed to sync, state is 0x%lx, cookie %d\n", mvm->queue_sync_state, mvm->queue_sync_cookie); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 53283d052e189..d343432474db0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -153,7 +153,7 @@ static void iwl_mvm_rx_esr_mode_notif(struct iwl_mvm *mvm, struct ieee80211_vif *vif = iwl_mvm_get_bss_vif(mvm); /* FW recommendations is only for entering EMLSR */ - if (!vif || iwl_mvm_vif_from_mac80211(vif)->esr_active) + if (IS_ERR_OR_NULL(vif) || iwl_mvm_vif_from_mac80211(vif)->esr_active) return; if (le32_to_cpu(notif->action) == ESR_RECOMMEND_ENTER) @@ -1912,12 +1912,10 @@ static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state) bool rfkill_safe_init_done = READ_ONCE(mvm->rfkill_safe_init_done); bool unified = iwl_mvm_has_unified_ucode(mvm); - if (state) { + if (state) set_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); - wake_up(&mvm->rx_sync_waitq); - } else { + else clear_bit(IWL_MVM_STATUS_HW_RFKILL, &mvm->status); - } iwl_mvm_set_rfkill_state(mvm); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 4fa8066a89b6a..6e933907f9852 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -557,12 +557,10 @@ struct iwl_mvm_stat_data_all_macs { }; static void iwl_mvm_update_link_sig(struct ieee80211_vif *vif, int sig, - struct iwl_mvm_vif_link_info *link_info) + struct iwl_mvm_vif_link_info *link_info, + struct ieee80211_bss_conf *bss_conf) { struct iwl_mvm *mvm = iwl_mvm_vif_from_mac80211(vif)->mvm; - struct ieee80211_bss_conf *bss_conf = - iwl_mvm_rcu_fw_link_id_to_link_conf(mvm, link_info->fw_link_id, - false); int thold = bss_conf->cqm_rssi_thold; int hyst = bss_conf->cqm_rssi_hyst; int last_event; @@ -670,7 +668,7 @@ static void iwl_mvm_stat_iterator(void *_data, u8 *mac, mvmvif->deflink.beacon_stats.num_beacons; /* This is used in pre-MLO API so use deflink */ - iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink); + iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf); } static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac, @@ -705,7 +703,7 @@ static void iwl_mvm_stat_iterator_all_macs(void *_data, u8 *mac, sig = -le32_to_cpu(mac_stats->beacon_filter_average_energy); /* This is used in pre-MLO API so use deflink */ - iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink); + iwl_mvm_update_link_sig(vif, sig, &mvmvif->deflink, &vif->bss_conf); } static inline void @@ -921,7 +919,8 @@ iwl_mvm_stat_iterator_all_links(struct iwl_mvm *mvm, mvmvif->link[link_id]->beacon_stats.num_beacons; sig = -le32_to_cpu(link_stats->beacon_filter_average_energy); - iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info); + iwl_mvm_update_link_sig(bss_conf->vif, sig, link_info, + bss_conf); if (WARN_ONCE(mvmvif->id >= MAC_INDEX_AUX, "invalid mvmvif id: %d", mvmvif->id)) @@ -967,7 +966,7 @@ static void iwl_mvm_update_esr_mode_tpt(struct iwl_mvm *mvm) lockdep_assert_held(&mvm->mutex); - if (!bss_vif) + if (IS_ERR_OR_NULL(bss_vif)) return; mvmvif = iwl_mvm_vif_from_mac80211(bss_vif); diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index f1085ccb7eedc..7719e4f3e2a23 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -382,7 +382,8 @@ wilc_parse_join_bss_param(struct cfg80211_bss *bss, struct ieee80211_p2p_noa_attr noa_attr; const struct cfg80211_bss_ies *ies; struct wilc_join_bss_param *param; - u8 rates_len = 0, ies_len; + u8 rates_len = 0; + int ies_len; int ret; param = kzalloc(sizeof(*param), GFP_KERNEL); diff --git a/drivers/net/wireless/ti/wlcore/cmd.c b/drivers/net/wireless/ti/wlcore/cmd.c index a939fd89a7f5e..92fc2d456c2c4 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.c +++ b/drivers/net/wireless/ti/wlcore/cmd.c @@ -1566,13 +1566,6 @@ int wl12xx_cmd_add_peer(struct wl1271 *wl, struct wl12xx_vif *wlvif, cpu_to_le32(wl1271_tx_enabled_rates_get(wl, sta_rates, wlvif->band)); - if (!cmd->supported_rates) { - wl1271_debug(DEBUG_CMD, - "peer has no supported rates yet, configuring basic rates: 0x%x", - wlvif->basic_rate_set); - cmd->supported_rates = cpu_to_le32(wlvif->basic_rate_set); - } - wl1271_debug(DEBUG_CMD, "new peer rates=0x%x queues=0x%x", cmd->supported_rates, sta->uapsd_queues); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index ef12169f8044d..492cd7aef44f1 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5139,19 +5139,23 @@ static int wl12xx_update_sta_state(struct wl1271 *wl, /* Add station (AP mode) */ if (is_ap && - old_state == IEEE80211_STA_NOTEXIST && - new_state == IEEE80211_STA_NONE) { + old_state == IEEE80211_STA_AUTH && + new_state == IEEE80211_STA_ASSOC) { ret = wl12xx_sta_add(wl, wlvif, sta); if (ret) return ret; + wl_sta->fw_added = true; + wlcore_update_inconn_sta(wl, wlvif, wl_sta, true); } /* Remove station (AP mode) */ if (is_ap && - old_state == IEEE80211_STA_NONE && - new_state == IEEE80211_STA_NOTEXIST) { + old_state == IEEE80211_STA_ASSOC && + new_state == IEEE80211_STA_AUTH) { + wl_sta->fw_added = false; + /* must not fail */ wl12xx_sta_remove(wl, wlvif, sta); @@ -5165,11 +5169,6 @@ static int wl12xx_update_sta_state(struct wl1271 *wl, if (ret < 0) return ret; - /* reconfigure rates */ - ret = wl12xx_cmd_add_peer(wl, wlvif, sta, wl_sta->hlid); - if (ret < 0) - return ret; - ret = wl1271_acx_set_ht_capabilities(wl, &sta->deflink.ht_cap, true, wl_sta->hlid); diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c index 7bd3ce2f08044..464587d16ab20 100644 --- a/drivers/net/wireless/ti/wlcore/tx.c +++ b/drivers/net/wireless/ti/wlcore/tx.c @@ -140,11 +140,8 @@ EXPORT_SYMBOL(wl12xx_is_dummy_packet); static u8 wl12xx_tx_get_hlid_ap(struct wl1271 *wl, struct wl12xx_vif *wlvif, struct sk_buff *skb, struct ieee80211_sta *sta) { - if (sta) { - struct wl1271_station *wl_sta; - - wl_sta = (struct wl1271_station *)sta->drv_priv; - return wl_sta->hlid; + if (sta && wl1271_station(sta)->fw_added) { + return wl1271_station(sta)->hlid; } else { struct ieee80211_hdr *hdr; diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index eefae3f867b9f..817a8a61cac6f 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -324,6 +324,7 @@ struct wl12xx_rx_filter { struct wl1271_station { u8 hlid; + bool fw_added; bool in_connection; /* @@ -335,6 +336,11 @@ struct wl1271_station { u64 total_freed_pkts; }; +static inline struct wl1271_station *wl1271_station(struct ieee80211_sta *sta) +{ + return (struct wl1271_station *)sta->drv_priv; +} + struct wl12xx_vif { struct wl1271 *wl; struct list_head list; diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index e1ec3b7200d7b..f8dd7eb40fbe1 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -396,10 +396,9 @@ static int nvmem_sysfs_setup_compat(struct nvmem_device *nvmem, if (!config->base_dev) return -EINVAL; - if (config->type == NVMEM_TYPE_FRAM) - bin_attr_nvmem_eeprom_compat.attr.name = "fram"; - nvmem->eeprom = bin_attr_nvmem_eeprom_compat; + if (config->type == NVMEM_TYPE_FRAM) + nvmem->eeprom.attr.name = "fram"; nvmem->eeprom.attr.mode = nvmem_bin_attr_get_umode(nvmem); nvmem->eeprom.size = nvmem->size; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -463,7 +462,7 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem) "%s@%x,%x", entry->name, entry->offset, entry->bit_offset); - attrs[i].attr.mode = 0444; + attrs[i].attr.mode = 0444 & nvmem_bin_attr_get_umode(nvmem); attrs[i].size = entry->bytes; attrs[i].read = &nvmem_cell_attr_read; attrs[i].private = entry; diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c index 33678d0af2c24..6c2f80e166e28 100644 --- a/drivers/nvmem/meson-efuse.c +++ b/drivers/nvmem/meson-efuse.c @@ -18,18 +18,24 @@ static int meson_efuse_read(void *context, unsigned int offset, void *val, size_t bytes) { struct meson_sm_firmware *fw = context; + int ret; - return meson_sm_call_read(fw, (u8 *)val, bytes, SM_EFUSE_READ, offset, - bytes, 0, 0, 0); + ret = meson_sm_call_read(fw, (u8 *)val, bytes, SM_EFUSE_READ, offset, + bytes, 0, 0, 0); + + return ret < 0 ? ret : 0; } static int meson_efuse_write(void *context, unsigned int offset, void *val, size_t bytes) { struct meson_sm_firmware *fw = context; + int ret; + + ret = meson_sm_call_write(fw, (u8 *)val, bytes, SM_EFUSE_WRITE, offset, + bytes, 0, 0, 0); - return meson_sm_call_write(fw, (u8 *)val, bytes, SM_EFUSE_WRITE, offset, - bytes, 0, 0, 0); + return ret < 0 ? ret : 0; } static const struct of_device_id meson_efuse_match[] = { diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c index 752d0bf4445ee..7f907c5a445e7 100644 --- a/drivers/nvmem/rmem.c +++ b/drivers/nvmem/rmem.c @@ -46,7 +46,10 @@ static int rmem_read(void *context, unsigned int offset, memunmap(addr); - return count; + if (count < 0) + return count; + + return count == bytes ? 0 : -EIO; } static int rmem_probe(struct platform_device *pdev) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 462375b293e47..c94203ce65bb3 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -81,7 +81,8 @@ EXPORT_SYMBOL_GPL(of_irq_find_parent); /* * These interrupt controllers abuse interrupt-map for unspeakable * reasons and rely on the core code to *ignore* it (the drivers do - * their own parsing of the property). + * their own parsing of the property). The PAsemi entry covers a + * non-sensical interrupt-map that is better left ignored. * * If you think of adding to the list for something *new*, think * again. There is a high chance that you will be sent back to the @@ -95,6 +96,7 @@ static const char * const of_irq_imap_abusers[] = { "fsl,ls1043a-extirq", "fsl,ls1088a-extirq", "renesas,rza1-irqc", + "pasemi,rootbus", NULL, }; @@ -293,20 +295,8 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) imaplen -= imap - oldimap; pr_debug(" -> imaplen=%d\n", imaplen); } - if (!match) { - if (intc) { - /* - * The PASEMI Nemo is a known offender, so - * let's only warn for anyone else. - */ - WARN(!IS_ENABLED(CONFIG_PPC_PASEMI), - "%pOF interrupt-map failed, using interrupt-controller\n", - ipar); - return 0; - } - + if (!match) goto fail; - } /* * Successfully parsed an interrupt-map translation; copy new diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 78c490e0505af..0a02e85a89513 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -167,7 +167,7 @@ u64 riscv_pmu_event_update(struct perf_event *event) unsigned long cmask; u64 oldval, delta; - if (!rvpmu->ctr_read) + if (!rvpmu->ctr_read || (hwc->state & PERF_HES_UPTODATE)) return 0; cmask = riscv_pmu_ctr_get_width_mask(event); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index a2e4005e1fd01..4e842dcedfbaa 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -114,7 +115,7 @@ struct sbi_pmu_event_data { }; }; -static const struct sbi_pmu_event_data pmu_hw_event_map[] = { +static struct sbi_pmu_event_data pmu_hw_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = {.hw_gen_event = { SBI_PMU_HW_CPU_CYCLES, SBI_PMU_EVENT_TYPE_HW, 0}}, @@ -148,7 +149,7 @@ static const struct sbi_pmu_event_data pmu_hw_event_map[] = { }; #define C(x) PERF_COUNT_HW_CACHE_##x -static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] +static struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { [C(L1D)] = { @@ -293,6 +294,34 @@ static const struct sbi_pmu_event_data pmu_cache_event_map[PERF_COUNT_HW_CACHE_M }, }; +static void pmu_sbi_check_event(struct sbi_pmu_event_data *edata) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, + 0, cmask, 0, edata->event_idx, 0, 0); + if (!ret.error) { + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, + ret.value, 0x1, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + } else if (ret.error == SBI_ERR_NOT_SUPPORTED) { + /* This event cannot be monitored by any counter */ + edata->event_idx = -EINVAL; + } +} + +static void pmu_sbi_check_std_events(struct work_struct *work) +{ + for (int i = 0; i < ARRAY_SIZE(pmu_hw_event_map); i++) + pmu_sbi_check_event(&pmu_hw_event_map[i]); + + for (int i = 0; i < ARRAY_SIZE(pmu_cache_event_map); i++) + for (int j = 0; j < ARRAY_SIZE(pmu_cache_event_map[i]); j++) + for (int k = 0; k < ARRAY_SIZE(pmu_cache_event_map[i][j]); k++) + pmu_sbi_check_event(&pmu_cache_event_map[i][j][k]); +} + +static DECLARE_WORK(check_std_events_work, pmu_sbi_check_std_events); + static int pmu_sbi_ctr_get_width(int idx) { return pmu_ctr_list[idx].width; @@ -478,6 +507,12 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig) u64 raw_config_val; int ret; + /* + * Ensure we are finished checking standard hardware events for + * validity before allowing userspace to configure any events. + */ + flush_work(&check_std_events_work); + switch (type) { case PERF_TYPE_HARDWARE: if (config >= PERF_COUNT_HW_MAX) @@ -762,7 +797,7 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) * which may include counters that are not enabled yet. */ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, - 0, pmu->cmask, 0, 0, 0, 0); + 0, pmu->cmask, SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); } static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu) @@ -1359,6 +1394,9 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) if (ret) goto out_unregister; + /* Asynchronously check which standard events are available */ + schedule_work(&check_std_events_work); + return 0; out_unregister: diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index a3d881f6e5d90..87f0af3ff4b3a 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -597,6 +597,7 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev, val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_YC); break; case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_1AH); break; default: @@ -630,6 +631,7 @@ static bool amd_pmc_is_stb_supported(struct amd_pmc_dev *dev) case AMD_CPU_ID_CB: case AMD_CPU_ID_PS: case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: return true; default: return false; @@ -764,6 +766,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) case AMD_CPU_ID_CB: case AMD_CPU_ID_PS: case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: return MSG_OS_HINT_RN; } return -EINVAL; @@ -967,6 +970,7 @@ static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RV) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_SP) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; diff --git a/drivers/platform/x86/amd/pmc/pmc.h b/drivers/platform/x86/amd/pmc/pmc.h index 9e32d3128c3a2..f1166d15c8562 100644 --- a/drivers/platform/x86/amd/pmc/pmc.h +++ b/drivers/platform/x86/amd/pmc/pmc.h @@ -67,6 +67,7 @@ void amd_mp2_stb_deinit(struct amd_pmc_dev *dev); #define AMD_CPU_ID_PS 0x14E8 #define AMD_CPU_ID_SP 0x14A4 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PCI_DEVICE_ID_AMD_MP2_STB 0x172c #endif /* PMC_H */ diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 3a8d8df891864..78a5aac2dcfd6 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -3271,7 +3271,7 @@ static const char *find_hci_method(acpi_handle handle) */ #define QUIRK_HCI_HOTKEY_QUICKSTART BIT(1) -static const struct dmi_system_id toshiba_dmi_quirks[] = { +static const struct dmi_system_id toshiba_dmi_quirks[] __initconst = { { /* Toshiba Portégé R700 */ /* https://bugzilla.kernel.org/show_bug.cgi?id=21012 */ @@ -3299,6 +3299,7 @@ static const struct dmi_system_id toshiba_dmi_quirks[] = { }, .driver_data = (void *)(QUIRK_TURN_ON_PANEL_ON_RESUME | QUIRK_HCI_HOTKEY_QUICKSTART), }, + { } }; static int toshiba_acpi_add(struct acpi_device *acpi_dev) @@ -3306,8 +3307,6 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev) struct toshiba_acpi_dev *dev; const char *hci_method; u32 dummy; - const struct dmi_system_id *dmi_id; - long quirks = 0; int ret = 0; if (toshiba_acpi) @@ -3460,16 +3459,6 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev) } #endif - dmi_id = dmi_first_match(toshiba_dmi_quirks); - if (dmi_id) - quirks = (long)dmi_id->driver_data; - - if (turn_on_panel_on_resume == -1) - turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME); - - if (hci_hotkey_quickstart == -1) - hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART); - toshiba_wwan_available(dev); if (dev->wwan_supported) toshiba_acpi_setup_wwan_rfkill(dev); @@ -3618,10 +3607,27 @@ static struct acpi_driver toshiba_acpi_driver = { .drv.pm = &toshiba_acpi_pm, }; +static void __init toshiba_dmi_init(void) +{ + const struct dmi_system_id *dmi_id; + long quirks = 0; + + dmi_id = dmi_first_match(toshiba_dmi_quirks); + if (dmi_id) + quirks = (long)dmi_id->driver_data; + + if (turn_on_panel_on_resume == -1) + turn_on_panel_on_resume = !!(quirks & QUIRK_TURN_ON_PANEL_ON_RESUME); + + if (hci_hotkey_quickstart == -1) + hci_hotkey_quickstart = !!(quirks & QUIRK_HCI_HOTKEY_QUICKSTART); +} + static int __init toshiba_acpi_init(void) { int ret; + toshiba_dmi_init(); toshiba_proc_dir = proc_mkdir(PROC_TOSHIBA, acpi_root_dir); if (!toshiba_proc_dir) { pr_err("Unable to create proc dir " PROC_TOSHIBA "\n"); diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c index de9121ef4216b..d2cb4271a1cad 100644 --- a/drivers/pmdomain/qcom/rpmhpd.c +++ b/drivers/pmdomain/qcom/rpmhpd.c @@ -40,6 +40,7 @@ * @addr: Resource address as looped up using resource name from * cmd-db * @state_synced: Indicator that sync_state has been invoked for the rpmhpd resource + * @skip_retention_level: Indicate that retention level should not be used for the power domain */ struct rpmhpd { struct device *dev; @@ -56,6 +57,7 @@ struct rpmhpd { const char *res_name; u32 addr; bool state_synced; + bool skip_retention_level; }; struct rpmhpd_desc { @@ -173,6 +175,7 @@ static struct rpmhpd mxc = { .pd = { .name = "mxc", }, .peer = &mxc_ao, .res_name = "mxc.lvl", + .skip_retention_level = true, }; static struct rpmhpd mxc_ao = { @@ -180,6 +183,7 @@ static struct rpmhpd mxc_ao = { .active_only = true, .peer = &mxc, .res_name = "mxc.lvl", + .skip_retention_level = true, }; static struct rpmhpd nsp = { @@ -819,6 +823,9 @@ static int rpmhpd_update_level_mapping(struct rpmhpd *rpmhpd) return -EINVAL; for (i = 0; i < rpmhpd->level_count; i++) { + if (rpmhpd->skip_retention_level && buf[i] == RPMH_REGULATOR_LEVEL_RETENTION) + continue; + rpmhpd->level[i] = buf[i]; /* Remember the first corner with non-zero level */ diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index 7112f59326095..6bb5d9e372e4c 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -68,6 +68,7 @@ config RESET_BRCMSTB_RESCAL config RESET_GPIO tristate "GPIO reset controller" + depends on GPIOLIB help This enables a generic reset controller for resets attached via GPIOs. Typically for OF platforms this driver expects "reset-gpios" diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c index 5c3267acd2b1c..65aa5ff5ed82c 100644 --- a/drivers/reset/hisilicon/hi6220_reset.c +++ b/drivers/reset/hisilicon/hi6220_reset.c @@ -219,4 +219,5 @@ static int __init hi6220_reset_init(void) postcore_initcall(hi6220_reset_init); +MODULE_DESCRIPTION("Hisilicon Hi6220 reset controller driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 2f16f543079b4..a76c6af9ea638 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4906,7 +4906,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) ccw++; if (dst) { if (ccw->flags & CCW_FLAG_IDA) - cda = *((char **)dma32_to_virt(ccw->cda)); + cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda))); else cda = dma32_to_virt(ccw->cda); if (dst != cda) { @@ -5525,7 +5525,7 @@ dasd_eckd_dump_ccw_range(struct dasd_device *device, struct ccw1 *from, /* get pointer to data (consider IDALs) */ if (from->flags & CCW_FLAG_IDA) - datap = (char *)*((addr_t *)dma32_to_virt(from->cda)); + datap = dma64_to_virt(*((dma64_t *)dma32_to_virt(from->cda))); else datap = dma32_to_virt(from->cda); diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 361e9bd752570..9f2023a077c20 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -585,7 +585,7 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) ccw++; if (dst) { if (ccw->flags & CCW_FLAG_IDA) - cda = *((char **)dma32_to_virt(ccw->cda)); + cda = dma64_to_virt(*((dma64_t *)dma32_to_virt(ccw->cda))); else cda = dma32_to_virt(ccw->cda); if (dst != cda) { diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 6e5c508b1e07c..5f6e102256276 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -490,13 +490,14 @@ static int ccwchain_fetch_tic(struct ccw1 *ccw, struct channel_program *cp) { struct ccwchain *iter; - u32 cda, ccw_head; + u32 offset, ccw_head; list_for_each_entry(iter, &cp->ccwchain_list, next) { ccw_head = iter->ch_iova; if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) { - cda = (u64)iter->ch_ccw + dma32_to_u32(ccw->cda) - ccw_head; - ccw->cda = u32_to_dma32(cda); + /* Calculate offset of TIC target */ + offset = dma32_to_u32(ccw->cda) - ccw_head; + ccw->cda = virt_to_dma32((void *)iter->ch_ccw + offset); return 0; } } @@ -914,7 +915,7 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw) * in the ioctl directly. Path status changes etc. */ list_for_each_entry(chain, &cp->ccwchain_list, next) { - ccw_head = (u32)(u64)chain->ch_ccw; + ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw)); /* * On successful execution, cpa points just beyond the end * of the chain. diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 85948963fb97a..03d6ec1eb970b 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -145,6 +145,20 @@ static inline void sas_fail_probe(struct domain_device *dev, const char *func, i func, dev->parent ? "exp-attached" : "direct-attached", SAS_ADDR(dev->sas_addr), err); + + /* + * If the device probe failed, the expander phy attached address + * needs to be reset so that the phy will not be treated as flutter + * in the next revalidation + */ + if (dev->parent && !dev_is_expander(dev->dev_type)) { + struct sas_phy *phy = dev->phy; + struct domain_device *parent = dev->parent; + struct ex_phy *ex_phy = &parent->ex_dev.ex_phy[phy->number]; + + memset(ex_phy->attached_sas_addr, 0, SAS_ADDR_SIZE); + } + sas_unregister_dev(dev->port, dev); } diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index acf0592d63dae..91f022fb8d0ce 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -926,6 +926,7 @@ static const int device_qfull_result = static const int condition_met_result = SAM_STAT_CONDITION_MET; static struct dentry *sdebug_debugfs_root; +static ASYNC_DOMAIN_EXCLUSIVE(sdebug_async_domain); static void sdebug_err_free(struct rcu_head *head) { @@ -1148,6 +1149,8 @@ static int sdebug_target_alloc(struct scsi_target *starget) if (!targetip) return -ENOMEM; + async_synchronize_full_domain(&sdebug_async_domain); + targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev), sdebug_debugfs_root); @@ -1174,7 +1177,8 @@ static void sdebug_target_destroy(struct scsi_target *starget) targetip = (struct sdebug_target_info *)starget->hostdata; if (targetip) { starget->hostdata = NULL; - async_schedule(sdebug_tartget_cleanup_async, targetip); + async_schedule_domain(sdebug_tartget_cleanup_async, targetip, + &sdebug_async_domain); } } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6b64af7d49273..1b7561abe05d9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4119,8 +4119,6 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; @@ -4137,12 +4135,13 @@ static int sd_resume_common(struct device *dev, bool runtime) if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); diff --git a/drivers/soc/litex/Kconfig b/drivers/soc/litex/Kconfig index e6ba3573a7729..f3f8696395886 100644 --- a/drivers/soc/litex/Kconfig +++ b/drivers/soc/litex/Kconfig @@ -7,7 +7,7 @@ config LITEX config LITEX_SOC_CONTROLLER tristate "Enable LiteX SoC Controller driver" - depends on OF || COMPILE_TEST + depends on OF depends on HAS_IOMEM select LITEX help diff --git a/drivers/soc/litex/litex_soc_ctrl.c b/drivers/soc/litex/litex_soc_ctrl.c index 10813299aa106..72c44119dd541 100644 --- a/drivers/soc/litex/litex_soc_ctrl.c +++ b/drivers/soc/litex/litex_soc_ctrl.c @@ -82,13 +82,11 @@ static int litex_reset_handler(struct notifier_block *this, unsigned long mode, return NOTIFY_DONE; } -#ifdef CONFIG_OF static const struct of_device_id litex_soc_ctrl_of_match[] = { {.compatible = "litex,soc-controller"}, {}, }; MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match); -#endif /* CONFIG_OF */ static int litex_soc_ctrl_probe(struct platform_device *pdev) { @@ -130,7 +128,7 @@ static void litex_soc_ctrl_remove(struct platform_device *pdev) static struct platform_driver litex_soc_ctrl_driver = { .driver = { .name = "litex-soc-controller", - .of_match_table = of_match_ptr(litex_soc_ctrl_of_match) + .of_match_table = litex_soc_ctrl_of_match, }, .probe = litex_soc_ctrl_probe, .remove_new = litex_soc_ctrl_remove, diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index 40fb09d69014c..65279243072c3 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -348,11 +348,15 @@ static void pmic_glink_remove(struct platform_device *pdev) mutex_unlock(&__pmic_glink_lock); } +static const unsigned long pmic_glink_sc8280xp_client_mask = BIT(PMIC_GLINK_CLIENT_BATT) | + BIT(PMIC_GLINK_CLIENT_ALTMODE); + static const unsigned long pmic_glink_sm8450_client_mask = BIT(PMIC_GLINK_CLIENT_BATT) | BIT(PMIC_GLINK_CLIENT_ALTMODE) | BIT(PMIC_GLINK_CLIENT_UCSI); static const struct of_device_id pmic_glink_of_match[] = { + { .compatible = "qcom,sc8280xp-pmic-glink", .data = &pmic_glink_sc8280xp_client_mask }, { .compatible = "qcom,pmic-glink", .data = &pmic_glink_sm8450_client_mask }, {} }; diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index e358ac5b45097..96a524772549e 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -164,16 +164,20 @@ static void spi_engine_gen_xfer(struct spi_engine_program *p, bool dry, } static void spi_engine_gen_sleep(struct spi_engine_program *p, bool dry, - int delay_ns, u32 sclk_hz) + int delay_ns, int inst_ns, u32 sclk_hz) { unsigned int t; - /* negative delay indicates error, e.g. from spi_delay_to_ns() */ - if (delay_ns <= 0) + /* + * Negative delay indicates error, e.g. from spi_delay_to_ns(). And if + * delay is less that the instruction execution time, there is no need + * for an extra sleep instruction since the instruction execution time + * will already cover the required delay. + */ + if (delay_ns < 0 || delay_ns <= inst_ns) return; - /* rounding down since executing the instruction adds a couple of ticks delay */ - t = DIV_ROUND_DOWN_ULL((u64)delay_ns * sclk_hz, NSEC_PER_SEC); + t = DIV_ROUND_UP_ULL((u64)(delay_ns - inst_ns) * sclk_hz, NSEC_PER_SEC); while (t) { unsigned int n = min(t, 256U); @@ -220,10 +224,16 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry, struct spi_device *spi = msg->spi; struct spi_controller *host = spi->controller; struct spi_transfer *xfer; - int clk_div, new_clk_div; + int clk_div, new_clk_div, inst_ns; bool keep_cs = false; u8 bits_per_word = 0; + /* + * Take into account instruction execution time for more accurate sleep + * times, especially when the delay is small. + */ + inst_ns = DIV_ROUND_UP(NSEC_PER_SEC, host->max_speed_hz); + clk_div = 1; spi_engine_program_add_cmd(p, dry, @@ -252,7 +262,7 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry, spi_engine_gen_xfer(p, dry, xfer); spi_engine_gen_sleep(p, dry, spi_delay_to_ns(&xfer->delay, xfer), - xfer->effective_speed_hz); + inst_ns, xfer->effective_speed_hz); if (xfer->cs_change) { if (list_is_last(&xfer->transfer_list, &msg->transfers)) { @@ -262,7 +272,7 @@ static void spi_engine_compile_message(struct spi_message *msg, bool dry, spi_engine_gen_cs(p, dry, spi, false); spi_engine_gen_sleep(p, dry, spi_delay_to_ns( - &xfer->cs_change_delay, xfer), + &xfer->cs_change_delay, xfer), inst_ns, xfer->effective_speed_hz); if (!list_next_entry(xfer, transfer_list)->cs_off) diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index be3998104bfbb..f7e8b5efa50e5 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -984,6 +984,9 @@ static int davinci_spi_probe(struct platform_device *pdev) return ret; free_dma: + /* This bit needs to be cleared to disable dpsi->clk */ + clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK); + if (dspi->dma_rx) { dma_release_channel(dspi->dma_rx); dma_release_channel(dspi->dma_tx); @@ -1013,6 +1016,9 @@ static void davinci_spi_remove(struct platform_device *pdev) spi_bitbang_stop(&dspi->bitbang); + /* This bit needs to be cleared to disable dpsi->clk */ + clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK); + if (dspi->dma_rx) { dma_release_channel(dspi->dma_rx); dma_release_channel(dspi->dma_tx); diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 33164ebdb5831..1439883326cfe 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1050,7 +1050,7 @@ static struct spi_imx_devtype_data imx35_cspi_devtype_data = { .rx_available = mx31_rx_available, .reset = mx31_reset, .fifo_size = 8, - .has_dmamode = true, + .has_dmamode = false, .dynamic_burst = false, .has_targetmode = false, .devtype = IMX35_CSPI, diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c index 5d72e3d59df83..c02c4204442f5 100644 --- a/drivers/spi/spi-mux.c +++ b/drivers/spi/spi-mux.c @@ -158,12 +158,14 @@ static int spi_mux_probe(struct spi_device *spi) /* supported modes are the same as our parent's */ ctlr->mode_bits = spi->controller->mode_bits; ctlr->flags = spi->controller->flags; + ctlr->bits_per_word_mask = spi->controller->bits_per_word_mask; ctlr->transfer_one_message = spi_mux_transfer_one_message; ctlr->setup = spi_mux_setup; ctlr->num_chipselect = mux_control_states(priv->mux); ctlr->bus_num = -1; ctlr->dev.of_node = spi->dev.of_node; ctlr->must_async = true; + ctlr->defer_optimize_message = true; ret = devm_spi_register_controller(&spi->dev, ctlr); if (ret) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 7e3083b835348..002f29dbcea6e 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -1277,24 +1277,11 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, /* * Check if this transfer contains only one word; - * OR contains 1 to 4 words, with bits_per_word == 8 and no delay between each word - * OR contains 1 to 2 words, with bits_per_word == 16 and no delay between each word - * - * If one of the two last case is true, this also change the bits_per_word of this - * transfer to make it a bit faster. - * It's not an issue to change the bits_per_word here even if the multi-mode is not - * applicable for this message, the signal on the wire will be the same. */ if (bits_per_word < 8 && tr->len == 1) { /* multi-mode is applicable, only one word (1..7 bits) */ - } else if (tr->word_delay.value == 0 && bits_per_word == 8 && tr->len <= 4) { - /* multi-mode is applicable, only one "bigger" word (8,16,24,32 bits) */ - tr->bits_per_word = tr->len * bits_per_word; - } else if (tr->word_delay.value == 0 && bits_per_word == 16 && tr->len <= 2) { - /* multi-mode is applicable, only one "bigger" word (16,32 bits) */ - tr->bits_per_word = tr->len * bits_per_word / 2; } else if (bits_per_word >= 8 && tr->len == bits_per_word / 8) { - /* multi-mode is applicable, only one word (9..15,17..32 bits) */ + /* multi-mode is applicable, only one word (8..32 bits) */ } else { /* multi-mode is not applicable: more than one word in the transfer */ mcspi->use_multi_mode = false; diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index fc13fa1921895..0f04e832f9ec2 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2151,7 +2151,8 @@ static void __spi_unoptimize_message(struct spi_message *msg) */ static void spi_maybe_unoptimize_message(struct spi_message *msg) { - if (!msg->pre_optimized && msg->optimized) + if (!msg->pre_optimized && msg->optimized && + !msg->spi->controller->defer_optimize_message) __spi_unoptimize_message(msg); } @@ -4294,6 +4295,11 @@ static int __spi_optimize_message(struct spi_device *spi, static int spi_maybe_optimize_message(struct spi_device *spi, struct spi_message *msg) { + if (spi->controller->defer_optimize_message) { + msg->spi = spi; + return 0; + } + if (msg->pre_optimized) return 0; @@ -4324,6 +4330,13 @@ int spi_optimize_message(struct spi_device *spi, struct spi_message *msg) { int ret; + /* + * Pre-optimization is not supported and optimization is deferred e.g. + * when using spi-mux. + */ + if (spi->controller->defer_optimize_message) + return 0; + ret = __spi_optimize_message(spi, msg); if (ret) return ret; @@ -4350,6 +4363,9 @@ EXPORT_SYMBOL_GPL(spi_optimize_message); */ void spi_unoptimize_message(struct spi_message *msg) { + if (msg->spi->controller->defer_optimize_message) + return; + __spi_unoptimize_message(msg); msg->pre_optimized = false; } @@ -4432,8 +4448,6 @@ int spi_async(struct spi_device *spi, struct spi_message *message) spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags); - spi_maybe_unoptimize_message(message); - return ret; } EXPORT_SYMBOL_GPL(spi_async); diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index 3235e1c719e84..3e73efa51bba0 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -660,7 +660,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev, const struct ffa_ops *ops) { const struct ffa_msg_ops *msg_ops = ops->msg_ops; - struct ffa_send_direct_data data = { OPTEE_FFA_GET_API_VERSION }; + struct ffa_send_direct_data data = { + .data0 = OPTEE_FFA_GET_API_VERSION, + }; int rc; msg_ops->mode_32bit_set(ffa_dev); @@ -677,7 +679,9 @@ static bool optee_ffa_api_is_compatbile(struct ffa_device *ffa_dev, return false; } - data = (struct ffa_send_direct_data){ OPTEE_FFA_GET_OS_VERSION }; + data = (struct ffa_send_direct_data){ + .data0 = OPTEE_FFA_GET_OS_VERSION, + }; rc = msg_ops->sync_send_receive(ffa_dev, &data); if (rc) { pr_err("Unexpected error %d\n", rc); @@ -698,7 +702,9 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev, unsigned int *rpc_param_count, unsigned int *max_notif_value) { - struct ffa_send_direct_data data = { OPTEE_FFA_EXCHANGE_CAPABILITIES }; + struct ffa_send_direct_data data = { + .data0 = OPTEE_FFA_EXCHANGE_CAPABILITIES, + }; int rc; rc = ops->msg_ops->sync_send_receive(ffa_dev, &data); diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 45f04a25255a3..1b2345a697c5a 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -759,6 +759,9 @@ static void power_allocator_manage(struct thermal_zone_device *tz) return; } + if (!params->trip_max) + return; + allocate_power(tz, params->trip_max->temperature); params->update_cdevs = true; } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 1b0ab27908604..ecc748d15eb7c 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -300,6 +300,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz) thermal_zone_device_set_polling(tz, tz->passive_delay_jiffies); else if (tz->polling_delay_jiffies) thermal_zone_device_set_polling(tz, tz->polling_delay_jiffies); + else if (tz->temperature == THERMAL_TEMP_INVALID) + thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS)); } static struct thermal_governor *thermal_get_tz_governor(struct thermal_zone_device *tz) @@ -482,16 +484,14 @@ static void thermal_trip_crossed(struct thermal_zone_device *tz, thermal_governor_trip_crossed(governor, tz, trip, crossed_up); } -static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a, +static int thermal_trip_notify_cmp(void *not_used, const struct list_head *a, const struct list_head *b) { struct thermal_trip_desc *tda = container_of(a, struct thermal_trip_desc, notify_list_node); struct thermal_trip_desc *tdb = container_of(b, struct thermal_trip_desc, notify_list_node); - int ret = tdb->notify_temp - tda->notify_temp; - - return ascending ? ret : -ret; + return tda->notify_temp - tdb->notify_temp; } void __thermal_zone_device_update(struct thermal_zone_device *tz, @@ -511,7 +511,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, update_temperature(tz); if (tz->temperature == THERMAL_TEMP_INVALID) - return; + goto monitor; __thermal_zone_set_trips(tz); @@ -520,12 +520,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, for_each_trip_desc(tz, td) handle_thermal_trip(tz, td, &way_up_list, &way_down_list); - list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp); + list_sort(NULL, &way_up_list, thermal_trip_notify_cmp); list_for_each_entry(td, &way_up_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, true); list_sort(NULL, &way_down_list, thermal_trip_notify_cmp); - list_for_each_entry(td, &way_down_list, notify_list_node) + list_for_each_entry_reverse(td, &way_down_list, notify_list_node) thermal_trip_crossed(tz, &td->trip, governor, false); if (governor->manage) @@ -533,6 +533,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, thermal_debug_update_trip_stats(tz); +monitor: monitor_thermal_zone(tz); } diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 66f67e54e0c8d..94eeb4011a481 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -133,6 +133,12 @@ struct thermal_zone_device { struct thermal_trip_desc trips[] __counted_by(num_trips); }; +/* + * Default delay after a failing thermal zone temperature check before + * attempting to check it again. + */ +#define THERMAL_RECHECK_DELAY_MS 250 + /* Default Thermal Governor */ #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) #define DEFAULT_THERMAL_GOVERNOR "step_wise" diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index ddac0a13cf840..1af9aed99c651 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -672,7 +672,8 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) * https://www.ti.com/lit/pdf/sprz536 */ if (priv->habit & UART_RX_TIMEOUT_QUIRK && - (iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT) { + (iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT && + serial_port_in(port, UART_OMAP_RX_LVL) == 0) { unsigned char efr2, timeout_h, timeout_l; efr2 = serial_in(up, UART_OMAP_EFR2); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index f4f40c9373c2f..ff32cd2d2863a 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -120,6 +120,7 @@ #define UCR4_OREN (1<<1) /* Receiver overrun interrupt enable */ #define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */ #define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */ +#define UFCR_RXTL_MASK 0x3F /* Receiver trigger 6 bits wide */ #define UFCR_DCEDTE (1<<6) /* DCE/DTE mode select */ #define UFCR_RFDIV (7<<7) /* Reference freq divider mask */ #define UFCR_RFDIV_REG(x) (((x) < 7 ? 6 - (x) : 6) << 7) @@ -1551,6 +1552,7 @@ static void imx_uart_shutdown(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long flags; u32 ucr1, ucr2, ucr4, uts; + int loops; if (sport->dma_is_enabled) { dmaengine_terminate_sync(sport->dma_chan_tx); @@ -1613,6 +1615,56 @@ static void imx_uart_shutdown(struct uart_port *port) ucr4 &= ~UCR4_TCEN; imx_uart_writel(sport, ucr4, UCR4); + /* + * We have to ensure the tx state machine ends up in OFF. This + * is especially important for rs485 where we must not leave + * the RTS signal high, blocking the bus indefinitely. + * + * All interrupts are now disabled, so imx_uart_stop_tx() will + * no longer be called from imx_uart_transmit_buffer(). It may + * still be called via the hrtimers, and if those are in play, + * we have to honour the delays. + */ + if (sport->tx_state == WAIT_AFTER_RTS || sport->tx_state == SEND) + imx_uart_stop_tx(port); + + /* + * In many cases (rs232 mode, or if tx_state was + * WAIT_AFTER_RTS, or if tx_state was SEND and there is no + * delay_rts_after_send), this will have moved directly to + * OFF. In rs485 mode, tx_state might already have been + * WAIT_AFTER_SEND and the hrtimer thus already started, or + * the above imx_uart_stop_tx() call could have started it. In + * those cases, we have to wait for the hrtimer to fire and + * complete the transition to OFF. + */ + loops = port->rs485.flags & SER_RS485_ENABLED ? + port->rs485.delay_rts_after_send : 0; + while (sport->tx_state != OFF && loops--) { + uart_port_unlock_irqrestore(&sport->port, flags); + msleep(1); + uart_port_lock_irqsave(&sport->port, &flags); + } + + if (sport->tx_state != OFF) { + dev_warn(sport->port.dev, "unexpected tx_state %d\n", + sport->tx_state); + /* + * This machine may be busted, but ensure the RTS + * signal is inactive in order not to block other + * devices. + */ + if (port->rs485.flags & SER_RS485_ENABLED) { + ucr2 = imx_uart_readl(sport, UCR2); + if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND) + imx_uart_rts_active(sport, &ucr2); + else + imx_uart_rts_inactive(sport, &ucr2); + imx_uart_writel(sport, ucr2, UCR2); + } + sport->tx_state = OFF; + } + uart_port_unlock_irqrestore(&sport->port, flags); clk_disable_unprepare(sport->clk_per); @@ -1933,7 +1985,7 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio struct serial_rs485 *rs485conf) { struct imx_port *sport = (struct imx_port *)port; - u32 ucr2; + u32 ucr2, ufcr; if (rs485conf->flags & SER_RS485_ENABLED) { /* Enable receiver if low-active RTS signal is requested */ @@ -1953,7 +2005,10 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio /* Make sure Rx is enabled in case Tx is active with Rx disabled */ if (!(rs485conf->flags & SER_RS485_ENABLED) || rs485conf->flags & SER_RS485_RX_DURING_TX) { - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + /* If the receiver trigger is 0, set it to a default value */ + ufcr = imx_uart_readl(sport, UFCR); + if ((ufcr & UFCR_RXTL_MASK) == 0) + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); imx_uart_start_rx(port); } diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c index 19f0a305cc430..3b4206e815fe9 100644 --- a/drivers/tty/serial/ma35d1_serial.c +++ b/drivers/tty/serial/ma35d1_serial.c @@ -688,12 +688,13 @@ static int ma35d1serial_probe(struct platform_device *pdev) struct uart_ma35d1_port *up; int ret = 0; - if (pdev->dev.of_node) { - ret = of_alias_get_id(pdev->dev.of_node, "serial"); - if (ret < 0) { - dev_err(&pdev->dev, "failed to get alias/pdev id, errno %d\n", ret); - return ret; - } + if (!pdev->dev.of_node) + return -ENODEV; + + ret = of_alias_get_id(pdev->dev.of_node, "serial"); + if (ret < 0) { + dev_err(&pdev->dev, "failed to get alias/pdev id, errno %d\n", ret); + return ret; } up = &ma35d1serial_ports[ret]; up->port.line = ret; diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 2bd25afe0d925..69a632fefc41f 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -649,15 +649,25 @@ static void qcom_geni_serial_start_tx_dma(struct uart_port *uport) static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport) { + unsigned char c; u32 irq_en; - if (qcom_geni_serial_main_active(uport) || - !qcom_geni_serial_tx_empty(uport)) - return; + /* + * Start a new transfer in case the previous command was cancelled and + * left data in the FIFO which may prevent the watermark interrupt + * from triggering. Note that the stale data is discarded. + */ + if (!qcom_geni_serial_main_active(uport) && + !qcom_geni_serial_tx_empty(uport)) { + if (uart_fifo_out(uport, &c, 1) == 1) { + writel(M_CMD_DONE_EN, uport->membase + SE_GENI_M_IRQ_CLEAR); + qcom_geni_serial_setup_tx(uport, 1); + writel(c, uport->membase + SE_GENI_TX_FIFOn); + } + } irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN); irq_en |= M_TX_FIFO_WATERMARK_EN | M_CMD_DONE_EN; - writel(DEF_TX_WM, uport->membase + SE_GENI_TX_WATERMARK_REG); writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN); } @@ -665,13 +675,17 @@ static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport) static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport) { u32 irq_en; - struct qcom_geni_serial_port *port = to_dev_port(uport); irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN); irq_en &= ~(M_CMD_DONE_EN | M_TX_FIFO_WATERMARK_EN); writel(0, uport->membase + SE_GENI_TX_WATERMARK_REG); writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN); - /* Possible stop tx is called multiple times. */ +} + +static void qcom_geni_serial_cancel_tx_cmd(struct uart_port *uport) +{ + struct qcom_geni_serial_port *port = to_dev_port(uport); + if (!qcom_geni_serial_main_active(uport)) return; @@ -684,6 +698,8 @@ static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport) writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR); } writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR); + + port->tx_remaining = 0; } static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop) @@ -862,7 +878,7 @@ static void qcom_geni_serial_send_chunk_fifo(struct uart_port *uport, memset(buf, 0, sizeof(buf)); tx_bytes = min(remaining, BYTES_PER_FIFO_WORD); - tx_bytes = uart_fifo_out(uport, buf, tx_bytes); + uart_fifo_out(uport, buf, tx_bytes); iowrite32_rep(uport->membase + SE_GENI_TX_FIFOn, buf, 1); @@ -890,13 +906,17 @@ static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport, else pending = kfifo_len(&tport->xmit_fifo); - /* All data has been transmitted and acknowledged as received */ - if (!pending && !status && done) { + /* All data has been transmitted or command has been cancelled */ + if (!pending && done) { qcom_geni_serial_stop_tx_fifo(uport); goto out_write_wakeup; } - avail = port->tx_fifo_depth - (status & TX_FIFO_WC); + if (active) + avail = port->tx_fifo_depth - (status & TX_FIFO_WC); + else + avail = port->tx_fifo_depth; + avail *= BYTES_PER_FIFO_WORD; chunk = min(avail, pending); @@ -1069,11 +1089,15 @@ static void qcom_geni_serial_shutdown(struct uart_port *uport) { disable_irq(uport->irq); - if (uart_console(uport)) - return; - qcom_geni_serial_stop_tx(uport); qcom_geni_serial_stop_rx(uport); + + qcom_geni_serial_cancel_tx_cmd(uport); +} + +static void qcom_geni_serial_flush_buffer(struct uart_port *uport) +{ + qcom_geni_serial_cancel_tx_cmd(uport); } static int qcom_geni_serial_port_setup(struct uart_port *uport) @@ -1532,6 +1556,7 @@ static const struct uart_ops qcom_geni_console_pops = { .request_port = qcom_geni_serial_request_port, .config_port = qcom_geni_serial_config_port, .shutdown = qcom_geni_serial_shutdown, + .flush_buffer = qcom_geni_serial_flush_buffer, .type = qcom_geni_serial_get_type, .set_mctrl = qcom_geni_serial_set_mctrl, .get_mctrl = qcom_geni_serial_get_mctrl, diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 8944548c30fa1..c532416aec229 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -105,16 +105,15 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac); * @hba: per adapter instance * @req: pointer to the request to be issued * - * Return: the hardware queue instance on which the request would - * be queued. + * Return: the hardware queue instance on which the request will be or has + * been queued. %NULL if the request has already been freed. */ struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba, struct request *req) { - u32 utag = blk_mq_unique_tag(req); - u32 hwq = blk_mq_unique_tag_to_hwq(utag); + struct blk_mq_hw_ctx *hctx = READ_ONCE(req->mq_hctx); - return &hba->uhq[hwq]; + return hctx ? &hba->uhq[hctx->queue_num] : NULL; } /** @@ -515,6 +514,8 @@ int ufshcd_mcq_sq_cleanup(struct ufs_hba *hba, int task_tag) if (!cmd) return -EINVAL; hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) + return 0; } else { hwq = hba->dev_cmd_queue; } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 1b65e6ae41375..46433ecf0c4dc 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6456,6 +6456,8 @@ static bool ufshcd_abort_one(struct request *rq, void *priv) /* Release cmd in MCQ mode if abort succeeds */ if (is_mcq_enabled(hba) && (*ret == 0)) { hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd)); + if (!hwq) + return 0; spin_lock_irqsave(&hwq->cq_lock, flags); if (ufshcd_cmd_inflight(lrbp->cmd)) ufshcd_release_scsi_cmd(hba, lrbp); diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 3362af165ef5a..880d52c0949d4 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -291,6 +291,20 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, if (ifp->desc.bNumEndpoints >= num_ep) goto skip_to_next_endpoint_or_interface_descriptor; + /* Save a copy of the descriptor and use it instead of the original */ + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; + memcpy(&endpoint->desc, d, n); + d = &endpoint->desc; + + /* Clear the reserved bits in bEndpointAddress */ + i = d->bEndpointAddress & + (USB_ENDPOINT_DIR_MASK | USB_ENDPOINT_NUMBER_MASK); + if (i != d->bEndpointAddress) { + dev_notice(ddev, "config %d interface %d altsetting %d has an endpoint descriptor with address 0x%X, changing to 0x%X\n", + cfgno, inum, asnum, d->bEndpointAddress, i); + endpoint->desc.bEndpointAddress = i; + } + /* Check for duplicate endpoint addresses */ if (config_endpoint_is_duplicate(config, inum, asnum, d)) { dev_notice(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", @@ -308,10 +322,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, } } - endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; + /* Accept this endpoint */ ++ifp->desc.bNumEndpoints; - - memcpy(&endpoint->desc, d, n); INIT_LIST_HEAD(&endpoint->urb_list); /* diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c index f1a499ee482c3..763e4122ed5b3 100644 --- a/drivers/usb/core/of.c +++ b/drivers/usb/core/of.c @@ -84,9 +84,12 @@ static bool usb_of_has_devices_or_graph(const struct usb_device *hub) if (of_graph_is_present(np)) return true; - for_each_child_of_node(np, child) - if (of_property_present(child, "reg")) + for_each_child_of_node(np, child) { + if (of_property_present(child, "reg")) { + of_node_put(child); return true; + } + } return false; } diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index b4783574b8e66..13171454f9591 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -506,6 +506,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1b1c, 0x1b38), .driver_info = USB_QUIRK_DELAY_INIT | USB_QUIRK_DELAY_CTRL_MSG }, + /* START BP-850k Printer */ + { USB_DEVICE(0x1bc3, 0x0003), .driver_info = USB_QUIRK_NO_SET_INTF }, + /* MIDI keyboard WORLDE MINI */ { USB_DEVICE(0x1c75, 0x0204), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 9ef821ca2fc71..052852f801467 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -54,6 +54,10 @@ #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e #define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 +#define PCI_DEVICE_ID_INTEL_PTLH 0xe332 +#define PCI_DEVICE_ID_INTEL_PTLH_PCH 0xe37e +#define PCI_DEVICE_ID_INTEL_PTLU 0xe432 +#define PCI_DEVICE_ID_INTEL_PTLU_PCH 0xe47e #define PCI_DEVICE_ID_AMD_MR 0x163a #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" @@ -430,6 +434,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLH, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLH_PCH, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLU, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, PTLU_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(AMD, NL_USB, &dwc3_pci_amd_swnode) }, { PCI_DEVICE_DATA(AMD, MR, &dwc3_pci_amd_mr_swnode) }, diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index ce3cfa1f36f51..0e7c1e947c0a0 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -115,9 +115,12 @@ static int usb_string_copy(const char *s, char **s_copy) int ret; char *str; char *copy = *s_copy; + ret = strlen(s); if (ret > USB_MAX_STRING_LEN) return -EOVERFLOW; + if (ret < 1) + return -EINVAL; if (copy) { str = copy; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 37eb37b0affa9..0a8cf6c17f827 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1125,10 +1125,20 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) xhci_dbg(xhci, "Start the secondary HCD\n"); retval = xhci_run(xhci->shared_hcd); } - + if (retval) + return retval; + /* + * Resume roothubs unconditionally as PORTSC change bits are not + * immediately visible after xHC reset + */ hcd->state = HC_STATE_SUSPENDED; - if (xhci->shared_hcd) + + if (xhci->shared_hcd) { xhci->shared_hcd->state = HC_STATE_SUSPENDED; + usb_hcd_resume_root_hub(xhci->shared_hcd); + } + usb_hcd_resume_root_hub(hcd); + goto done; } @@ -1152,7 +1162,6 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) xhci_dbc_resume(xhci); - done: if (retval == 0) { /* * Resume roothubs only if there are pending events. @@ -1178,6 +1187,7 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) usb_hcd_resume_root_hub(hcd); } } +done: /* * If system is subject to the Quirk, Compliance Mode Timer needs to * be re-initialized Always after a system resume. Ports are subject diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 8b0308d84270f..85697466b1476 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1737,6 +1737,49 @@ static void mos7840_port_remove(struct usb_serial_port *port) kfree(mos7840_port); } +static int mos7840_suspend(struct usb_serial *serial, pm_message_t message) +{ + struct moschip_port *mos7840_port; + struct usb_serial_port *port; + int i; + + for (i = 0; i < serial->num_ports; ++i) { + port = serial->port[i]; + if (!tty_port_initialized(&port->port)) + continue; + + mos7840_port = usb_get_serial_port_data(port); + + usb_kill_urb(mos7840_port->read_urb); + mos7840_port->read_urb_busy = false; + } + + return 0; +} + +static int mos7840_resume(struct usb_serial *serial) +{ + struct moschip_port *mos7840_port; + struct usb_serial_port *port; + int res; + int i; + + for (i = 0; i < serial->num_ports; ++i) { + port = serial->port[i]; + if (!tty_port_initialized(&port->port)) + continue; + + mos7840_port = usb_get_serial_port_data(port); + + mos7840_port->read_urb_busy = true; + res = usb_submit_urb(mos7840_port->read_urb, GFP_NOIO); + if (res) + mos7840_port->read_urb_busy = false; + } + + return 0; +} + static struct usb_serial_driver moschip7840_4port_device = { .driver = { .owner = THIS_MODULE, @@ -1764,6 +1807,8 @@ static struct usb_serial_driver moschip7840_4port_device = { .port_probe = mos7840_port_probe, .port_remove = mos7840_port_remove, .read_bulk_callback = mos7840_bulk_in_callback, + .suspend = mos7840_suspend, + .resume = mos7840_resume, }; static struct usb_serial_driver * const serial_drivers[] = { diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8a5846d4adf67..311040f9b9352 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1425,6 +1425,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x3000, 0xff), /* Telit FN912 */ + .driver_info = RSVD(0) | NCTRL(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x3001, 0xff), /* Telit FN912 */ + .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7010, 0xff), /* Telit LE910-S1 (RNDIS) */ .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ @@ -1433,6 +1437,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */ .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x9000, 0xff), /* Telit generic core-dump device */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */ @@ -2224,6 +2230,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_7106_2COM, 0x02, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7126, 0xff, 0x00, 0x00), + .driver_info = NCTRL(2) }, + { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, 0x7127, 0xff, 0x00, 0x00), + .driver_info = NCTRL(2) | NCTRL(3) | NCTRL(4) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MPL200), .driver_info = RSVD(1) | RSVD(4) }, @@ -2284,6 +2294,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0f0, 0xff), /* Foxconn T99W373 MBIM */ .driver_info = RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe145, 0xff), /* Foxconn T99W651 RNDIS */ + .driver_info = RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 (IOT version) */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ @@ -2321,6 +2333,32 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */ .driver_info = RSVD(5) }, + { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0802, 0xff), /* Rolling RW350-GL (laptop MBIM) */ + .driver_info = RSVD(5) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Global */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0100, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for Global SKU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0101, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WRD for China SKU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0106, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for SA */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0111, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for EU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0112, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for NA */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0113, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for China EDU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0115, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x30) }, /* NetPrisma LCUK54-WWD for Golbal EDU */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x3731, 0x0116, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 987c7921affa6..ba0ce0075b2fb 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1260,7 +1260,7 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( struct vfio_pci_hot_reset_info hdr; struct vfio_pci_fill_info fill = {}; bool slot = false; - int ret, count; + int ret, count = 0; if (copy_from_user(&hdr, arg, minsz)) return -EFAULT; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 15bb7989c387a..3acf5e0500728 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -512,7 +512,7 @@ static int afs_iget5_set_root(struct inode *inode, void *opaque) struct afs_vnode *vnode = AFS_FS_I(inode); vnode->volume = as->volume; - vnode->fid.vid = as->volume->vid, + vnode->fid.vid = as->volume->vid; vnode->fid.vnode = 1; vnode->fid.unique = 1; inode->i_ino = 1; @@ -545,7 +545,7 @@ struct inode *afs_root_iget(struct super_block *sb, struct key *key) BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); - vnode->cb_v_check = atomic_read(&as->volume->cb_v_break), + vnode->cb_v_check = atomic_read(&as->volume->cb_v_break); afs_set_netfs_context(vnode); op = afs_alloc_operation(key, as->volume); diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 1de9fac3bcf4f..658f11aebda1f 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -3,6 +3,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" +#include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_key_cache.h" @@ -1553,13 +1554,13 @@ int bch2_check_alloc_info(struct bch_fs *c) } static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, - struct btree_iter *alloc_iter) + struct btree_iter *alloc_iter, + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct btree_iter lru_iter; struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; - struct bkey_s_c alloc_k, lru_k; + struct bkey_s_c alloc_k; struct printbuf buf = PRINTBUF; int ret; @@ -1573,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, a = bch2_alloc_to_v4(alloc_k, &a_convert); + if (a->fragmentation_lru) { + ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, + a->fragmentation_lru, + alloc_k, last_flushed); + if (ret) + return ret; + } + if (a->data_type != BCH_DATA_cached) return 0; @@ -1597,41 +1606,30 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, a = &a_mut->v; } - lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, - lru_pos(alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]), 0); - ret = bkey_err(lru_k); + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ], + alloc_k, last_flushed); if (ret) - return ret; - - if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, - alloc_key_to_missing_lru_entry, - "missing lru entry\n" - " %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - ret = bch2_lru_set(trans, - alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ]); - if (ret) - goto err; - } + goto err; err: fsck_err: - bch2_trans_iter_exit(trans, &lru_iter); printbuf_exit(&buf); return ret; } int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter))); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 9d3d64746a5be..27d97c22ae272 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) nr[c->open_buckets[i].data_type]++; + printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 24); percpu_down_read(&c->mark_lock); @@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++) nr[c->open_buckets[i].data_type]++; + printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 12); printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 4321f9fb73bd9..6d8b1bc90be0c 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c) return ret; } -static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) -{ - return bpos_eq(l.k->p, r.k->p) && - bkey_bytes(l.k) == bkey_bytes(r.k) && - !memcmp(l.v, r.v, bkey_val_bytes(l.k)); -} - struct extents_to_bp_state { struct bpos bucket_start; struct bpos bucket_end; @@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans, struct btree_iter other_extent_iter = {}; struct printbuf buf = PRINTBUF; struct bkey_s_c bp_k; - struct bkey_buf tmp; int ret = 0; - bch2_bkey_buf_init(&tmp); - struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); if (!ca) { prt_str(&buf, "extent for nonexistent device:bucket "); @@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans, if (bp_k.k->type != KEY_TYPE_backpointer || memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { - bch2_bkey_buf_reassemble(&tmp, c, orig_k); - - if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { - if (bp.level) { - bch2_trans_unlock(trans); - bch2_btree_interior_updates_flush(c); - } - - ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) - goto err; - - bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); - ret = -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); + if (ret) + goto err; goto check_existing_bp; } @@ -589,7 +566,6 @@ static int check_bp_exists(struct btree_trans *trans, fsck_err: bch2_trans_iter_exit(trans, &other_extent_iter); bch2_trans_iter_exit(trans, &bp_iter); - bch2_bkey_buf_exit(&tmp, c); bch2_dev_put(ca); printbuf_exit(&buf); return ret; @@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, !((1U << btree) & btree_interior_mask)) continue; + bch2_trans_begin(trans); + __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, 0, depth, BTREE_ITER_prefetch, b, ret) { @@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans, struct bbpos start, struct bbpos end, struct bkey_s_c_backpointer bp, - struct bpos *last_flushed_pos) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans, if (ret) return ret; - if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { - *last_flushed_pos = bp.k->p; - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + if (!k.k) { + ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); + if (ret) + goto out; - if (fsck_err_on(!k.k, c, - backpointer_to_missing_ptr, - "backpointer for missing %s\n %s", - bp.v->level ? "btree node" : "extent", - (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { - ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); - goto out; + if (fsck_err(c, backpointer_to_missing_ptr, + "backpointer for missing %s\n %s", + bp.v->level ? "btree node" : "extent", + (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { + ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); + goto out; + } } out: fsck_err: @@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, struct bbpos start, struct bbpos end) { - struct bpos last_flushed_pos = SPOS_MAX; + struct bkey_buf last_flushed; - return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_one_backpointer(trans, start, end, bkey_s_c_to_backpointer(k), - &last_flushed_pos)); + &last_flushed)); + + bch2_bkey_buf_exit(&last_flushed, trans->c); + return ret; } int bch2_check_backpointers_to_extents(struct bch_fs *c) diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index 94a1d1982fa88..587d7318a2e81 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c, bch2_bkey_format_field_overflows(f, i)) { unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1)); - u64 packed_max = f->bits_per_field[i] - ? ~((~0ULL << 1) << (f->bits_per_field[i] - 1)) + unsigned packed_bits = min(64, f->bits_per_field[i]); + u64 packed_max = packed_bits + ? ~((~0ULL << 1) << (packed_bits - 1)) : 0; prt_printf(err, "field %u too large: %llu + %llu > %llu", diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index fcd43915df079..936357149cf0f 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r) return bkey_gt(l, r) ? l : r; } +static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) +{ + return bpos_eq(l.k->p, r.k->p) && + bkey_bytes(l.k) == bkey_bytes(r.k) && + !memcmp(l.v, r.v, bkey_val_bytes(l.k)); +} + void bch2_bpos_swab(struct bpos *); void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0e477a926579a..a0deb8266011d 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -641,16 +641,30 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in target_depth = 0; /* root */ - mutex_lock(&c->btree_root_lock); - struct btree *b = bch2_btree_id_root(c, btree)->b; - if (!btree_node_fake(b)) { + do { +retry_root: + bch2_trans_begin(trans); + + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, + 0, bch2_btree_id_root(c, btree)->b->c.level, 0); + struct btree *b = bch2_btree_iter_peek_node(&iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto err_root; + + if (b != btree_node_root(c, b)) { + bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX)); - ret = lockrestart_do(trans, - bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, - NULL, NULL, bkey_i_to_s_c(&b->key), initial)); + struct bkey_s_c k = bkey_i_to_s_c(&b->key); + ret = bch2_gc_mark_key(trans, btree, b->c.level + 1, NULL, NULL, k, initial); level = b->c.level; - } - mutex_unlock(&c->btree_root_lock); +err_root: + bch2_trans_iter_exit(trans, &iter); + } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); if (ret) return ret; @@ -903,6 +917,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans, bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true); percpu_up_read(&c->mark_lock); + gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); + if (fsck_err_on(new.data_type != gc.data_type, c, alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" @@ -916,23 +932,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans, #define copy_bucket_field(_errtype, _f) \ if (fsck_err_on(new._f != gc._f, c, _errtype, \ "bucket %llu:%llu gen %u data type %s has wrong " #_f \ - ": got %u, should be %u", \ + ": got %llu, should be %llu", \ iter->pos.inode, iter->pos.offset, \ gc.gen, \ bch2_data_type_str(gc.data_type), \ - new._f, gc._f)) \ + (u64) new._f, (u64) gc._f)) \ new._f = gc._f; \ - copy_bucket_field(alloc_key_gen_wrong, - gen); - copy_bucket_field(alloc_key_dirty_sectors_wrong, - dirty_sectors); - copy_bucket_field(alloc_key_cached_sectors_wrong, - cached_sectors); - copy_bucket_field(alloc_key_stripe_wrong, - stripe); - copy_bucket_field(alloc_key_stripe_redundancy_wrong, - stripe_redundancy); + copy_bucket_field(alloc_key_gen_wrong, gen); + copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors); + copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); + copy_bucket_field(alloc_key_stripe_wrong, stripe); + copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); + copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) @@ -946,7 +958,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans, a->v = new; /* - * The trigger normally makes sure this is set, but we're not running + * The trigger normally makes sure these are set, but we're not running * triggers: */ if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ]) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 0ed9e6574fcd0..19352a08ea204 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -996,7 +996,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) bch2_trans_unlock(trans); cond_resched(); - trans->locked = true; + trans_set_locked(trans); if (unlikely(trans->memory_allocation_failure)) { struct closure cl; @@ -3089,7 +3089,8 @@ u32 bch2_trans_begin(struct btree_trans *trans) bch2_trans_srcu_unlock(trans); trans->last_begin_ip = _RET_IP_; - trans->locked = true; + + trans_set_locked(trans); if (trans->restarted) { bch2_btree_path_traverse_all(trans); @@ -3159,7 +3160,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->last_begin_time = local_clock(); trans->fn_idx = fn_idx; trans->locking_wait.task = current; - trans->locked = true; trans->journal_replay_not_finished = unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)) && atomic_inc_not_zero(&c->journal_keys.ref); @@ -3193,6 +3193,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; trans->srcu_held = true; + trans_set_locked(trans); closure_init_stack_release(&trans->ref); return trans; diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index d66fff22109ae..c51826fd557f5 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -231,7 +231,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_newline(&buf); } - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } @@ -792,7 +792,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) return bch2_trans_relock_fail(trans, path, &f, trace); } - trans->locked = true; + trans_set_locked(trans); out: bch2_trans_verify_locks(trans); return 0; @@ -812,16 +812,14 @@ void bch2_trans_unlock_noassert(struct btree_trans *trans) { __bch2_trans_unlock(trans); - trans->locked = false; - trans->last_unlock_ip = _RET_IP_; + trans_set_unlocked(trans); } void bch2_trans_unlock(struct btree_trans *trans) { __bch2_trans_unlock(trans); - trans->locked = false; - trans->last_unlock_ip = _RET_IP_; + trans_set_unlocked(trans); } void bch2_trans_unlock_long(struct btree_trans *trans) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 7f41545b9147f..75a6274c7d272 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -193,6 +193,28 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); /* lock: */ +static inline void trans_set_locked(struct btree_trans *trans) +{ + if (!trans->locked) { + trans->locked = true; + trans->last_unlock_ip = 0; + + trans->pf_memalloc_nofs = (current->flags & PF_MEMALLOC_NOFS) != 0; + current->flags |= PF_MEMALLOC_NOFS; + } +} + +static inline void trans_set_unlocked(struct btree_trans *trans) +{ + if (trans->locked) { + trans->locked = false; + trans->last_unlock_ip = _RET_IP_; + + if (!trans->pf_memalloc_nofs) + current->flags &= ~PF_MEMALLOC_NOFS; + } +} + static inline int __btree_node_lock_nopath(struct btree_trans *trans, struct btree_bkey_cached_common *b, enum six_lock_type type, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 87f485e9c552d..48cb1a7d31c51 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -484,6 +484,7 @@ struct btree_trans { bool lock_may_not_fail:1; bool srcu_held:1; bool locked:1; + bool pf_memalloc_nofs:1; bool write_locked:1; bool used_mempool:1; bool in_traverse_all:1; diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 75c8a196b3f63..d0e92d948002d 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "bkey_buf.h" #include "btree_locking.h" #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" #include "error.h" +#include "extents.h" #include "journal.h" #include "journal_io.h" #include "journal_reclaim.h" @@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) return ret; } +/** + * In check and repair code, when checking references to write buffer btrees we + * need to issue a flush before we have a definitive error: this issues a flush + * if this is a key we haven't yet checked. + */ +int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, + struct bkey_s_c referring_k, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct bkey_buf tmp; + int ret = 0; + + bch2_bkey_buf_init(&tmp); + + if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { + bch2_bkey_buf_reassemble(&tmp, c, referring_k); + + if (bkey_is_btree_ptr(referring_k.k)) { + bch2_trans_unlock(trans); + bch2_btree_interior_updates_flush(c); + } + + ret = bch2_btree_write_buffer_flush_sync(trans); + if (ret) + goto err; + + bch2_bkey_buf_copy(last_flushed, c, tmp.k); + ret = -BCH_ERR_transaction_restart_write_buffer_flush; + } +err: + bch2_bkey_buf_exit(&tmp, c); + return ret; +} + static void bch2_btree_write_buffer_flush_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work); diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index eebcd2b15249a..dd5e64218b50f 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *); int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); int bch2_btree_write_buffer_tryflush(struct btree_trans *); +struct bkey_buf; +int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *); + struct journal_keys_to_wb { struct btree_write_buffer_keys *wb; size_t room; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 743d57eba7607..314ee3e0187f4 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -805,7 +805,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - *bucket_gen(ca, bucket_nr), + bucket_gen_get(ca, bucket_nr), bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen, (printbuf_reset(&buf), diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 80ee0be9793e6..8ad4be73860cc 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -116,6 +116,14 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) return gens->b + b; } +static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b) +{ + rcu_read_lock(); + u8 gen = *bucket_gen(ca, b); + rcu_read_unlock(); + return gen; +} + static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, const struct bch_extent_ptr *ptr) { diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 3636444511064..0f40b585ce2b5 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock, { struct io_timer *ret = NULL; - spin_lock(&clock->timer_lock); - if (clock->timers.used && time_after_eq(now, clock->timers.data[0]->expire)) heap_pop(&clock->timers, ret, io_timer_cmp, NULL); - - spin_unlock(&clock->timer_lock); - return ret; } @@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors) struct io_timer *timer; unsigned long now = atomic64_add_return(sectors, &clock->now); + spin_lock(&clock->timer_lock); while ((timer = get_expired_timer(clock, now))) timer->fn(timer); + spin_unlock(&clock->timer_lock); } void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1a0072eef109b..0087b8555ead3 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -5,7 +5,9 @@ #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" +#include "compress.h" #include "data_update.h" +#include "disk_groups.h" #include "ec.h" #include "error.h" #include "extents.h" @@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, } } +void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) +{ + printbuf_tabstop_push(out, 20); + prt_str(out, "rewrite ptrs:\t"); + bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); + prt_newline(out); + + prt_str(out, "kill ptrs:\t"); + bch2_prt_u64_base2(out, data_opts->kill_ptrs); + prt_newline(out); + + prt_str(out, "target:\t"); + bch2_target_to_text(out, c, data_opts->target); + prt_newline(out); + + prt_str(out, "compression:\t"); + bch2_compression_opt_to_text(out, background_compression(*io_opts)); + prt_newline(out); + + prt_str(out, "extra replicas:\t"); + prt_u64(out, data_opts->extra_replicas); +} + +void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) +{ + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); + prt_newline(out); + bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); +} + int bch2_extent_drop_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans, if (!(durability_have + durability_removing)) m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); + if (!m->op.nr_replicas) { + struct printbuf buf = PRINTBUF; + + bch2_data_update_to_text(&buf, m); + WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); + printbuf_exit(&buf); + ret = -BCH_ERR_data_update_done; + goto done; + } + m->op.nr_replicas_required = m->op.nr_replicas; if (reserve_sectors) { diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 991095bbd469b..8d36365bdea8a 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -17,6 +17,9 @@ struct data_update_opts { unsigned write_flags; }; +void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, + struct bch_io_opts *, struct data_update_opts *); + struct data_update { /* extent being updated: */ enum btree_id btree_id; @@ -27,6 +30,8 @@ struct data_update { struct bch_write_op op; }; +void bch2_data_update_to_text(struct printbuf *, struct data_update *); + int bch2_data_update_index_update(struct bch_write_op *); void bch2_data_update_read_done(struct data_update *, diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index f0d4727c4dc29..ebabab171fe5e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -610,7 +610,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, list_sort(&c->btree_trans_list, list_ptr_order_cmp); list_for_each_entry(trans, &c->btree_trans_list, list) { - if ((ulong) trans < i->iter) + if ((ulong) trans <= i->iter) continue; i->iter = (ulong) trans; @@ -832,16 +832,16 @@ static const struct file_operations btree_transaction_stats_op = { static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_trans *trans; - pid_t iter = 0; + ulong iter = 0; restart: seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) { - struct task_struct *task = READ_ONCE(trans->locking_wait.task); + list_sort(&c->btree_trans_list, list_ptr_order_cmp); - if (!task || task->pid <= iter) + list_for_each_entry(trans, &c->btree_trans_list, list) { + if ((ulong) trans <= iter) continue; - iter = task->pid; + iter = (ulong) trans; if (!closure_get_not_zero(&trans->ref)) continue; diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h index 24840aee335c0..795f4fc0bab17 100644 --- a/fs/bcachefs/eytzinger.h +++ b/fs/bcachefs/eytzinger.h @@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i) static inline unsigned eytzinger1_first(unsigned size) { - return rounddown_pow_of_two(size); + return size ? rounddown_pow_of_two(size) : 0; } static inline unsigned eytzinger1_last(unsigned size) @@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size) static inline unsigned eytzinger1_extra(unsigned size) { - return (size + 1 - rounddown_pow_of_two(size)) << 1; + return size + ? (size + 1 - rounddown_pow_of_two(size)) << 1 + : 0; } static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f9c9a95d7d4ca..fa1fee05cf8f5 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -194,6 +194,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * discard_new_inode() expects it to be set... */ inode->v.i_flags |= I_NEW; + /* + * We don't want bch2_evict_inode() to delete the inode on disk, + * we just raced and had another inode in cache. Normally new + * inodes don't have nlink == 0 - except tmpfiles do... + */ + set_nlink(&inode->v, 1); discard_new_inode(&inode->v); inode = old; } else { @@ -2026,6 +2032,8 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, __bch2_fs_stop(c); deactivate_locked_super(sb); err: + if (ret) + pr_err("error: %s", bch2_err_str(ret)); /* * On an inconsistency error in recovery we might see an -EROFS derived * errorcode (from the journal), but we don't want to return that to diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 4ec979b4b23e4..4583c9386e8c1 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -125,7 +125,7 @@ int bch2_extent_fallocate(struct btree_trans *trans, bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { - bch2_trans_unlock(trans); + bch2_trans_unlock_long(trans); closure_sync(&cl); } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index c97fa7002b06e..ebf39ef72fb2c 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -389,7 +389,6 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - bch2_trans_unlock(trans); if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, @@ -1004,6 +1003,9 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, rbio->promote = promote; INIT_WORK(&rbio->work, NULL); + if (flags & BCH_READ_NODECODE) + orig->pick = pick; + rbio->bio.bi_opf = orig->bio.bi_opf; rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 13669dd0e3756..10b19791ec98e 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1095,7 +1095,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, return ret; } -int bch2_dev_journal_alloc(struct bch_dev *ca) +int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) { unsigned nr; int ret; @@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL); + ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL); err: bch_err_fn(ca, ret); return ret; @@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c) if (ca->journal.nr) continue; - int ret = bch2_dev_journal_alloc(ca); + int ret = bch2_dev_journal_alloc(ca, true); if (ret) { percpu_ref_put(&ca->io_ref); return ret; @@ -1184,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j) journal_quiesce(j); cancel_delayed_work_sync(&j->write_work); - BUG_ON(!bch2_journal_error(j) && - test_bit(JOURNAL_replay_done, &j->flags) && - j->last_empty_seq != journal_cur_seq(j)); + WARN(!bch2_journal_error(j) && + test_bit(JOURNAL_replay_done, &j->flags) && + j->last_empty_seq != journal_cur_seq(j), + "journal shutdown error: cur seq %llu but last empty seq %llu", + journal_cur_seq(j), j->last_empty_seq); if (!bch2_journal_error(j)) clear_bit(JOURNAL_running, &j->flags); @@ -1418,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) unsigned long now = jiffies; u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes; - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 28); + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 28); out->atomic++; rcu_read_lock(); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index fd1f7cdaa8bc6..bc6b9c39dcb4c 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); -int bch2_dev_journal_alloc(struct bch_dev *); +int bch2_dev_journal_alloc(struct bch_dev *, bool); int bch2_fs_journal_alloc(struct bch_fs *); void bch2_dev_journal_stop(struct journal *, struct bch_dev *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index db24ce21b2acf..2326e2cb9cd2e 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c, flags|BCH_VALIDATE_journal); if (ret == FSCK_DELETED_KEY) continue; + else if (ret) + return ret; k = bkey_next(k); } @@ -1762,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush) if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { spin_lock(&j->lock); - closure_wait(&j->async_wait, cl); + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); + continue_at(cl, journal_write_preflush, j->wq); + return; + } spin_unlock(&j->lock); - - continue_at(cl, journal_write_preflush, j->wq); - return; } if (w->separate_flush) { diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index a40d116224edd..b12894ef44f30 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = { NULL }; +int bch2_lru_check_set(struct btree_trans *trans, + u16 lru_id, u64 time, + struct bkey_s_c referring_k, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + struct btree_iter lru_iter; + struct bkey_s_c lru_k = + bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, + lru_pos(lru_id, + bucket_to_u64(referring_k.k->p), + time), 0); + int ret = bkey_err(lru_k); + if (ret) + return ret; + + if (lru_k.k->type != KEY_TYPE_set) { + ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed); + if (ret) + goto err; + + if (fsck_err(c, alloc_key_to_missing_lru_entry, + "missing %s lru entry\n" + " %s", + bch2_lru_types[lru_type(lru_k)], + (bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) { + ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time); + if (ret) + goto err; + } + } +err: +fsck_err: + bch2_trans_iter_exit(trans, &lru_iter); + printbuf_exit(&buf); + return ret; +} + static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index bd71ba77de078..ed75bcf59d478 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -61,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64); int bch2_lru_set(struct btree_trans *, u16, u64, u64); int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); +struct bkey_buf; +int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *); + int bch2_check_lrus(struct bch_fs *); #endif /* _BCACHEFS_LRU_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 6e477fadaa2a5..e714e3bd5bbbb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = { NULL }; -static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, - struct bch_io_opts *io_opts, - struct data_update_opts *data_opts) -{ - printbuf_tabstop_push(out, 20); - prt_str(out, "rewrite ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); - prt_newline(out); - - prt_str(out, "kill ptrs:\t"); - bch2_prt_u64_base2(out, data_opts->kill_ptrs); - prt_newline(out); - - prt_str(out, "target:\t"); - bch2_target_to_text(out, c, data_opts->target); - prt_newline(out); - - prt_str(out, "compression:\t"); - bch2_compression_opt_to_text(out, background_compression(*io_opts)); - prt_newline(out); - - prt_str(out, "extra replicas:\t"); - prt_u64(out, data_opts->extra_replicas); -} - static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d6f35a99c4291..d54121ec093fe 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -286,7 +286,8 @@ enum bch_fsck_flags { x(accounting_mismatch, 272, 0) \ x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ - x(alloc_key_io_time_bad, 275, 0) + x(alloc_key_io_time_bad, 275, 0) \ + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index fb906467201e9..da735608d47c8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -563,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c) BUG_ON(atomic_read(&c->journal_keys.ref)); bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); - EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved)); - free_percpu(c->online_reserved); + if (c->online_reserved) { + u64 v = percpu_u64_get(c->online_reserved); + WARN(v, "online_reserved not 0 at shutdown: %lli", v); + free_percpu(c->online_reserved); + } darray_exit(&c->btree_roots_extra); free_percpu(c->pcpu); @@ -1769,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ret) goto err; - ret = bch2_dev_journal_alloc(ca); + ret = bch2_dev_journal_alloc(ca, true); bch_err_msg(c, ret, "allocating journal"); if (ret) goto err; @@ -1929,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path) } if (!ca->journal.nr) { - ret = bch2_dev_journal_alloc(ca); + ret = bch2_dev_journal_alloc(ca, false); bch_err_msg(ca, ret, "allocating journal"); if (ret) goto err; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index de331dec2a99c..4ec7e44d6e36c 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -252,8 +252,10 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } -void bch2_print_string_as_lines(const char *prefix, const char *lines) +static void __bch2_print_string_as_lines(const char *prefix, const char *lines, + bool nonblocking) { + bool locked = false; const char *p; if (!lines) { @@ -261,7 +263,13 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) return; } - console_lock(); + if (!nonblocking) { + console_lock(); + locked = true; + } else { + locked = console_trylock(); + } + while (1) { p = strchrnul(lines, '\n'); printk("%s%.*s\n", prefix, (int) (p - lines), lines); @@ -269,7 +277,18 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) break; lines = p + 1; } - console_unlock(); + if (locked) + console_unlock(); +} + +void bch2_print_string_as_lines(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, false); +} + +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, true); } int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 5d2c470a49ac9..5b0533ec4c7e1 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -315,6 +315,7 @@ void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *prefix, const char *lines); +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1a66be33bb048..60066822b5329 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1924,8 +1924,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) next: if (ret) { /* Refcount held by the reclaim_bgs list after splice. */ - btrfs_get_block_group(bg); - list_add_tail(&bg->bg_list, &retry_list); + spin_lock(&fs_info->unused_bgs_lock); + /* + * This block group might be added to the unused list + * during the above process. Move it back to the + * reclaim list otherwise. + */ + if (list_empty(&bg->bg_list)) { + btrfs_get_block_group(bg); + list_add_tail(&bg->bg_list, &retry_list); + } + spin_unlock(&fs_info->unused_bgs_lock); } btrfs_put_block_group(bg); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38cdb8875e8e8..cabb558dbdaa8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2856,6 +2856,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block if (ret) return ret; + spin_lock_init(&fs_info->extent_map_shrinker_lock); + ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f688fab55251e..958155cc43a81 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3553,7 +3553,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, for (int i = 0; i < num_folios; i++) { if (eb->folios[i]) { detach_extent_buffer_folio(eb, eb->folios[i]); - __folio_put(eb->folios[i]); + folio_put(eb->folios[i]); } } __free_extent_buffer(eb); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 744e8952abb04..b4c9a6aa118cd 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1028,7 +1028,14 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, return ret; } -static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_to_scan) +struct btrfs_em_shrink_ctx { + long nr_to_scan; + long scanned; + u64 last_ino; + u64 last_root; +}; + +static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx) { const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info); struct extent_map_tree *tree = &inode->extent_tree; @@ -1057,14 +1064,25 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t if (!down_read_trylock(&inode->i_mmap_lock)) return 0; - write_lock(&tree->lock); + /* + * We want to be fast because we can be called from any path trying to + * allocate memory, so if the lock is busy we don't want to spend time + * waiting for it - either some task is about to do IO for the inode or + * we may have another task shrinking extent maps, here in this code, so + * skip this inode. + */ + if (!write_trylock(&tree->lock)) { + up_read(&inode->i_mmap_lock); + return 0; + } + node = rb_first_cached(&tree->map); while (node) { struct extent_map *em; em = rb_entry(node, struct extent_map, rb_node); node = rb_next(node); - (*scanned)++; + ctx->scanned++; if (em->flags & EXTENT_FLAG_PINNED) goto next; @@ -1085,16 +1103,18 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t free_extent_map(em); nr_dropped++; next: - if (*scanned >= nr_to_scan) + if (ctx->scanned >= ctx->nr_to_scan) break; /* - * Restart if we had to reschedule, and any extent maps that were - * pinned before may have become unpinned after we released the - * lock and took it again. + * Stop if we need to reschedule or there's contention on the + * lock. This is to avoid slowing other tasks trying to take the + * lock and because the shrinker might be called during a memory + * allocation path and we want to avoid taking a very long time + * and slowing down all sorts of tasks. */ - if (cond_resched_rwlock_write(&tree->lock)) - node = rb_first_cached(&tree->map); + if (need_resched() || rwlock_needbreak(&tree->lock)) + break; } write_unlock(&tree->lock); up_read(&inode->i_mmap_lock); @@ -1102,25 +1122,30 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t return nr_dropped; } -static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_scan) +static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_inode *inode; long nr_dropped = 0; - u64 min_ino = fs_info->extent_map_shrinker_last_ino + 1; + u64 min_ino = ctx->last_ino + 1; inode = btrfs_find_first_inode(root, min_ino); while (inode) { - nr_dropped += btrfs_scan_inode(inode, scanned, nr_to_scan); + nr_dropped += btrfs_scan_inode(inode, ctx); min_ino = btrfs_ino(inode) + 1; - fs_info->extent_map_shrinker_last_ino = btrfs_ino(inode); - iput(&inode->vfs_inode); + ctx->last_ino = btrfs_ino(inode); + btrfs_add_delayed_iput(inode); - if (*scanned >= nr_to_scan) + if (ctx->scanned >= ctx->nr_to_scan) + break; + + /* + * We may be called from memory allocation paths, so we don't + * want to take too much time and slowdown tasks. + */ + if (need_resched()) break; - cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1132,14 +1157,14 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s * inode if there is one or we will find out this was the last * one and move to the next root. */ - fs_info->extent_map_shrinker_last_root = btrfs_root_id(root); + ctx->last_root = btrfs_root_id(root); } else { /* * No more inodes in this root, set extent_map_shrinker_last_ino to 0 so * that when processing the next root we start from its first inode. */ - fs_info->extent_map_shrinker_last_ino = 0; - fs_info->extent_map_shrinker_last_root = btrfs_root_id(root) + 1; + ctx->last_ino = 0; + ctx->last_root = btrfs_root_id(root) + 1; } return nr_dropped; @@ -1147,19 +1172,41 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) { - const u64 start_root_id = fs_info->extent_map_shrinker_last_root; - u64 next_root_id = start_root_id; + struct btrfs_em_shrink_ctx ctx; + u64 start_root_id; + u64 next_root_id; bool cycled = false; long nr_dropped = 0; - long scanned = 0; + + ctx.scanned = 0; + ctx.nr_to_scan = nr_to_scan; + + /* + * In case we have multiple tasks running this shrinker, make the next + * one start from the next inode in case it starts before we finish. + */ + spin_lock(&fs_info->extent_map_shrinker_lock); + ctx.last_ino = fs_info->extent_map_shrinker_last_ino; + fs_info->extent_map_shrinker_last_ino++; + ctx.last_root = fs_info->extent_map_shrinker_last_root; + spin_unlock(&fs_info->extent_map_shrinker_lock); + + start_root_id = ctx.last_root; + next_root_id = ctx.last_root; if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) { s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps); - trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, nr); + trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan, + nr, ctx.last_root, + ctx.last_ino); } - while (scanned < nr_to_scan) { + /* + * We may be called from memory allocation paths, so we don't want to + * take too much time and slowdown tasks, so stop if we need reschedule. + */ + while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { struct btrfs_root *root; unsigned long count; @@ -1171,8 +1218,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) spin_unlock(&fs_info->fs_roots_radix_lock); if (start_root_id > 0 && !cycled) { next_root_id = 0; - fs_info->extent_map_shrinker_last_root = 0; - fs_info->extent_map_shrinker_last_ino = 0; + ctx.last_root = 0; + ctx.last_ino = 0; cycled = true; continue; } @@ -1186,15 +1233,33 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) continue; if (is_fstree(btrfs_root_id(root))) - nr_dropped += btrfs_scan_root(root, &scanned, nr_to_scan); + nr_dropped += btrfs_scan_root(root, &ctx); btrfs_put_root(root); } + /* + * In case of multiple tasks running this extent map shrinking code this + * isn't perfect but it's simple and silences things like KCSAN. It's + * not possible to know which task made more progress because we can + * cycle back to the first root and first inode if it's not the first + * time the shrinker ran, see the above logic. Also a task that started + * later may finish ealier than another task and made less progress. So + * make this simple and update to the progress of the last task that + * finished, with the occasional possiblity of having two consecutive + * runs of the shrinker process the same inodes. + */ + spin_lock(&fs_info->extent_map_shrinker_lock); + fs_info->extent_map_shrinker_last_ino = ctx.last_ino; + fs_info->extent_map_shrinker_last_root = ctx.last_root; + spin_unlock(&fs_info->extent_map_shrinker_lock); + if (trace_btrfs_extent_map_shrinker_scan_exit_enabled()) { s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps); - trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, nr); + trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, + nr, ctx.last_root, + ctx.last_ino); } return nr_dropped; diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 89f0650631cdf..833dc3fe0a38f 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -630,6 +630,7 @@ struct btrfs_fs_info { s32 delalloc_batch; struct percpu_counter evictable_extent_maps; + spinlock_t extent_map_shrinker_lock; u64 extent_map_shrinker_last_root; u64 extent_map_shrinker_last_ino; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 753db965f7c04..3a2b902b2d1f9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10385,7 +10385,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, out_folios: for (i = 0; i < nr_folios; i++) { if (folios[i]) - __folio_put(folios[i]); + folio_put(folios[i]); } kvfree(folios); out: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bf0f81d59b6bc..39a15cca58ca9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3062,8 +3062,6 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, struct btrfs_qgroup_inherit *inherit, size_t size) { - if (!btrfs_qgroup_enabled(fs_info)) - return 0; if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP) return -EOPNOTSUPP; if (size < sizeof(*inherit) || size > PAGE_SIZE) @@ -3084,6 +3082,14 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, if (size != struct_size(inherit, qgroups, inherit->num_qgroups)) return -EINVAL; + /* + * Skip the inherit source qgroups check if qgroup is not enabled. + * Qgroup can still be later enabled causing problems, but in that case + * btrfs_qgroup_inherit() would just ignore those invalid ones. + */ + if (!btrfs_qgroup_enabled(fs_info)) + return 0; + /* * Now check all the remaining qgroups, they should all: * diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index cf531255ab76c..9522a8b79d22b 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -441,7 +441,8 @@ static int process_extent_item(struct btrfs_fs_info *fs_info, u32 item_size = btrfs_item_size(leaf, slot); unsigned long end, ptr; u64 offset, flags, count; - int type, ret; + int type; + int ret = 0; ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); flags = btrfs_extent_flags(leaf, ei); @@ -486,7 +487,11 @@ static int process_extent_item(struct btrfs_fs_info *fs_info, key->objectid, key->offset); break; case BTRFS_EXTENT_OWNER_REF_KEY: - WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); + if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) { + btrfs_err(fs_info, + "found extent owner ref without simple quotas enabled"); + ret = -EINVAL; + } break; default: btrfs_err(fs_info, "invalid key type in iref"); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index d620323d08eae..ae8c56442549c 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -373,11 +373,18 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, * "optimal" chunk size based on the fs size. However when we actually * allocate the chunk we will strip this down further, making it no more * than 10% of the disk or 1G, whichever is smaller. + * + * On the zoned mode, we need to use zone_size (= + * data_sinfo->chunk_size) as it is. */ data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); - data_chunk_size = min(data_sinfo->chunk_size, - mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); - data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); + if (!btrfs_is_zoned(fs_info)) { + data_chunk_size = min(data_sinfo->chunk_size, + mult_perc(fs_info->fs_devices->total_rw_bytes, 10)); + data_chunk_size = min_t(u64, data_chunk_size, SZ_1G); + } else { + data_chunk_size = data_sinfo->chunk_size; + } /* * Since data allocations immediately use block groups as part of the @@ -405,6 +412,17 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, avail >>= 3; else avail >>= 1; + + /* + * On the zoned mode, we always allocate one zone as one chunk. + * Returning non-zone size alingned bytes here will result in + * less pressure for the async metadata reclaim process, and it + * will over-commit too much leading to ENOSPC. Align down to the + * zone size to avoid that. + */ + if (btrfs_is_zoned(fs_info)) + avail = ALIGN_DOWN(avail, fs_info->zone_size); + return avail; } diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index f449f7340aad0..9fb06dc165202 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "internal.h" /* @@ -312,19 +313,59 @@ static void cachefiles_withdraw_objects(struct cachefiles_cache *cache) } /* - * Withdraw volumes. + * Withdraw fscache volumes. + */ +static void cachefiles_withdraw_fscache_volumes(struct cachefiles_cache *cache) +{ + struct list_head *cur; + struct cachefiles_volume *volume; + struct fscache_volume *vcookie; + + _enter(""); +retry: + spin_lock(&cache->object_list_lock); + list_for_each(cur, &cache->volumes) { + volume = list_entry(cur, struct cachefiles_volume, cache_link); + + if (atomic_read(&volume->vcookie->n_accesses) == 0) + continue; + + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (vcookie) { + spin_unlock(&cache->object_list_lock); + fscache_withdraw_volume(vcookie); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); + goto retry; + } + } + spin_unlock(&cache->object_list_lock); + + _leave(""); +} + +/* + * Withdraw cachefiles volumes. */ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) { _enter(""); for (;;) { + struct fscache_volume *vcookie = NULL; struct cachefiles_volume *volume = NULL; spin_lock(&cache->object_list_lock); if (!list_empty(&cache->volumes)) { volume = list_first_entry(&cache->volumes, struct cachefiles_volume, cache_link); + vcookie = fscache_try_get_volume(volume->vcookie, + fscache_volume_get_withdraw); + if (!vcookie) { + spin_unlock(&cache->object_list_lock); + cpu_relax(); + continue; + } list_del_init(&volume->cache_link); } spin_unlock(&cache->object_list_lock); @@ -332,6 +373,7 @@ static void cachefiles_withdraw_volumes(struct cachefiles_cache *cache) break; cachefiles_withdraw_volume(volume); + fscache_put_volume(vcookie, fscache_volume_put_withdraw); } _leave(""); @@ -371,6 +413,7 @@ void cachefiles_withdraw_cache(struct cachefiles_cache *cache) pr_info("File cache on %s unregistering\n", fscache->name); fscache_withdraw_cache(fscache); + cachefiles_withdraw_fscache_volumes(cache); /* we now have to destroy all the active objects pertaining to this * cache - which we do by passing them off to thread pool to be diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 06cdf1a8a16f6..89b11336a8369 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -366,14 +366,14 @@ static __poll_t cachefiles_daemon_poll(struct file *file, if (cachefiles_in_ondemand_mode(cache)) { if (!xa_empty(&cache->reqs)) { - rcu_read_lock(); + xas_lock(&xas); xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { if (!cachefiles_ondemand_is_reopening_read(req)) { mask |= EPOLLIN; break; } } - rcu_read_unlock(); + xas_unlock(&xas); } } else { if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 6845a90cdfcce..7b99bd98de75b 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -48,6 +48,7 @@ enum cachefiles_object_state { CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ + CACHEFILES_ONDEMAND_OBJSTATE_DROPPING, /* Object is being dropped. */ }; struct cachefiles_ondemand_info { @@ -128,6 +129,7 @@ struct cachefiles_cache { unsigned long req_id_next; struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ u32 ondemand_id_next; + u32 msg_id_next; }; static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) @@ -335,6 +337,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); +CACHEFILES_OBJECT_STATE_FUNCS(dropping, DROPPING); static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req) { diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index bce005f2b4563..470c966583850 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -517,7 +517,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, */ xas_lock(&xas); - if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + if (test_bit(CACHEFILES_DEAD, &cache->flags) || + cachefiles_ondemand_object_is_dropping(object)) { xas_unlock(&xas); ret = -EIO; goto out; @@ -527,20 +528,32 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, smp_mb(); if (opcode == CACHEFILES_OP_CLOSE && - !cachefiles_ondemand_object_is_open(object)) { + !cachefiles_ondemand_object_is_open(object)) { WARN_ON_ONCE(object->ondemand->ondemand_id == 0); xas_unlock(&xas); ret = -EIO; goto out; } - xas.xa_index = 0; + /* + * Cyclically find a free xas to avoid msg_id reuse that would + * cause the daemon to successfully copen a stale msg_id. + */ + xas.xa_index = cache->msg_id_next; xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) { + xas.xa_index = 0; + xas_find_marked(&xas, cache->msg_id_next - 1, XA_FREE_MARK); + } if (xas.xa_node == XAS_RESTART) xas_set_err(&xas, -EBUSY); + xas_store(&xas, req); - xas_clear_mark(&xas, XA_FREE_MARK); - xas_set_mark(&xas, CACHEFILES_REQ_NEW); + if (xas_valid(&xas)) { + cache->msg_id_next = xas.xa_index + 1; + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + } xas_unlock(&xas); } while (xas_nomem(&xas, GFP_KERNEL)); @@ -568,7 +581,8 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, * If error occurs after creating the anonymous fd, * cachefiles_ondemand_fd_release() will set object to close. */ - if (opcode == CACHEFILES_OP_OPEN) + if (opcode == CACHEFILES_OP_OPEN && + !cachefiles_ondemand_object_is_dropping(object)) cachefiles_ondemand_set_object_close(object); kfree(req); return ret; @@ -667,8 +681,34 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) void cachefiles_ondemand_clean_object(struct cachefiles_object *object) { + unsigned long index; + struct cachefiles_req *req; + struct cachefiles_cache *cache; + + if (!object->ondemand) + return; + cachefiles_ondemand_send_req(object, CACHEFILES_OP_CLOSE, 0, cachefiles_ondemand_init_close_req, NULL); + + if (!object->ondemand->ondemand_id) + return; + + /* Cancel all requests for the object that is being dropped. */ + cache = object->volume->cache; + xa_lock(&cache->reqs); + cachefiles_ondemand_set_object_dropping(object); + xa_for_each(&cache->reqs, index, req) { + if (req->object == object) { + req->error = -EIO; + complete(&req->done); + __xa_erase(&cache->reqs, index); + } + } + xa_unlock(&cache->reqs); + + /* Wait for ondemand_object_worker() to finish to avoid UAF. */ + cancel_work_sync(&object->ondemand->ondemand_work); } int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, diff --git a/fs/cachefiles/volume.c b/fs/cachefiles/volume.c index 89df0ba8ba5e7..781aac4ef274b 100644 --- a/fs/cachefiles/volume.c +++ b/fs/cachefiles/volume.c @@ -133,7 +133,6 @@ void cachefiles_free_volume(struct fscache_volume *vcookie) void cachefiles_withdraw_volume(struct cachefiles_volume *volume) { - fscache_withdraw_volume(volume->vcookie); cachefiles_set_volume_xattr(volume); __cachefiles_free_volume(volume); } diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index bcb6173943ee4..4dd8a993c60a8 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -110,9 +110,11 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file if (xlen == 0) xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, tlen); if (xlen != tlen) { - if (xlen < 0) + if (xlen < 0) { + ret = xlen; trace_cachefiles_vfs_error(object, file_inode(file), xlen, cachefiles_trace_getxattr_error); + } if (xlen == -EIO) cachefiles_io_error_obj( object, @@ -252,6 +254,7 @@ int cachefiles_check_volume_xattr(struct cachefiles_volume *volume) xlen = vfs_getxattr(&nop_mnt_idmap, dentry, cachefiles_xattr_cache, buf, len); if (xlen != len) { if (xlen < 0) { + ret = xlen; trace_cachefiles_vfs_error(NULL, d_inode(dentry), xlen, cachefiles_trace_getxattr_error); if (xlen == -EIO) diff --git a/fs/dcache.c b/fs/dcache.c index 407095188f83a..4c144519aa709 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -355,7 +355,11 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) flags &= ~DCACHE_ENTRY_TYPE; WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; - if (flags & DCACHE_LRU_LIST) + /* + * The negative counter only tracks dentries on the LRU. Don't inc if + * d_lru is on another list. + */ + if ((flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) this_cpu_inc(nr_dentry_negative); } @@ -1844,9 +1848,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); /* - * Decrement negative dentry count if it was in the LRU list. + * The negative counter only tracks dentries on the LRU. Don't dec if + * d_lru is on another list. */ - if (dentry->d_flags & DCACHE_LRU_LIST) + if ((dentry->d_flags & + (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) this_cpu_dec(nr_dentry_negative); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); @@ -3029,28 +3035,25 @@ EXPORT_SYMBOL(d_splice_alias); bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { - bool result; + bool subdir; unsigned seq; if (new_dentry == old_dentry) return true; - do { - /* for restarting inner loop in case of seq retry */ - seq = read_seqbegin(&rename_lock); - /* - * Need rcu_readlock to protect against the d_parent trashing - * due to d_move - */ - rcu_read_lock(); - if (d_ancestor(old_dentry, new_dentry)) - result = true; - else - result = false; - rcu_read_unlock(); - } while (read_seqretry(&rename_lock, seq)); - - return result; + /* Access d_parent under rcu as d_move() may change it. */ + rcu_read_lock(); + seq = read_seqbegin(&rename_lock); + subdir = d_ancestor(old_dentry, new_dentry); + /* Try lockless once... */ + if (read_seqretry(&rename_lock, seq)) { + /* ...else acquire lock for progress even on deep chains. */ + read_seqlock_excl(&rename_lock); + subdir = d_ancestor(old_dentry, new_dentry); + read_sequnlock_excl(&rename_lock); + } + rcu_read_unlock(); + return subdir; } EXPORT_SYMBOL(is_subdir); diff --git a/fs/erofs/super.c b/fs/erofs/super.c index c93bd24d27717..1b91d95130138 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -343,7 +343,7 @@ static int erofs_read_superblock(struct super_block *sb) sbi->build_time = le64_to_cpu(dsb->build_time); sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); - memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); + super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid)); ret = strscpy(sbi->volume_name, dsb->volume_name, sizeof(dsb->volume_name)); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 9b248ee5fef2d..74d3d7bffcf3f 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -711,6 +711,8 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, err = z_erofs_do_map_blocks(inode, map, flags); out: + if (err) + map->m_llen = 0; trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); return err; } diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 036024bce9f7f..b80f612867c2b 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -148,7 +148,7 @@ int __init z_erofs_gbuf_init(void) void z_erofs_gbuf_exit(void) { - int i; + int i, j; for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) { struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; @@ -161,9 +161,9 @@ void z_erofs_gbuf_exit(void) if (!gbuf->pages) continue; - for (i = 0; i < gbuf->nrpages; ++i) - if (gbuf->pages[i]) - put_page(gbuf->pages[i]); + for (j = 0; j < gbuf->nrpages; ++j) + if (gbuf->pages[j]) + put_page(gbuf->pages[j]); kfree(gbuf->pages); gbuf->pages = NULL; } diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 5a400259ae74c..9a1a93e3888b9 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -696,7 +696,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) return err; } - strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + + strbuf = kzalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL); if (!strbuf) { res = -ENOMEM; diff --git a/fs/locks.c b/fs/locks.c index 90c8746874ded..bdd94c32256f5 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1367,9 +1367,9 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, locks_wake_up_blocks(&left->c); } out: + trace_posix_lock_inode(inode, request, error); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); - trace_posix_lock_inode(inode, request, error); /* * Free any unused locks. */ @@ -2448,8 +2448,9 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, error = do_lock_file_wait(filp, cmd, file_lock); /* - * Attempt to detect a close/fcntl race and recover by releasing the - * lock that was just acquired. There is no need to do that when we're + * Detect close/fcntl races and recover by zapping all POSIX locks + * associated with this file and our files_struct, just like on + * filp_flush(). There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->c.flc_type != F_UNLCK && @@ -2464,9 +2465,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { - file_lock->c.flc_type = F_UNLCK; - error = do_lock_file_wait(filp, cmd, file_lock); - WARN_ON_ONCE(error); + locks_remove_posix(filp, files); error = -EBADF; } } diff --git a/fs/minix/namei.c b/fs/minix/namei.c index d6031acc34f0c..a944a0f17b537 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -213,8 +213,7 @@ static int minix_rename(struct mnt_idmap *idmap, if (!new_de) goto out_dir; err = minix_set_link(new_de, new_page, old_inode); - kunmap(new_page); - put_page(new_page); + unmap_and_put_page(new_page, new_de); if (err) goto out_dir; inode_set_ctime_current(new_inode); diff --git a/fs/namei.c b/fs/namei.c index 37fb0a8aa09a0..1e05a0f3f04d9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3572,8 +3572,12 @@ static const char *open_last_lookups(struct nameidata *nd, else inode_lock_shared(dir->d_inode); dentry = lookup_open(nd, file, op, got_write); - if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED)) - fsnotify_create(dir->d_inode, dentry); + if (!IS_ERR(dentry)) { + if (file->f_mode & FMODE_CREATED) + fsnotify_create(dir->d_inode, dentry); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); + } if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else @@ -3700,6 +3704,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap, mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = dir->i_op->tmpfile(idmap, dir, file, mode); dput(child); + if (file->f_mode & FMODE_OPENED) + fsnotify_open(file); if (error) return error; /* Don't check for other permissions, the inode was just created */ diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a6bb03bea920c..4c0401dbbfcfa 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { if (folio->index == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) - _debug("no unlock"); + kdebug("no unlock"); else folio_unlock(folio); } @@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl) struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); int ret; - _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); + kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); if (readahead_count(ractl) == 0) return; @@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio) struct folio *sink = NULL; int ret; - _enter("%lx", folio->index); + kenter("%lx", folio->index); rreq = netfs_alloc_request(mapping, file, folio_file_pos(folio), folio_size(folio), @@ -508,7 +508,7 @@ int netfs_write_begin(struct netfs_inode *ctx, have_folio: *_folio = folio; - _leave(" = 0"); + kleave(" = 0"); return 0; error_put: @@ -518,7 +518,7 @@ int netfs_write_begin(struct netfs_inode *ctx, folio_unlock(folio); folio_put(folio); } - _leave(" = %d", ret); + kleave(" = %d", ret); return ret; } EXPORT_SYMBOL(netfs_write_begin); @@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t flen = folio_size(folio); int ret; - _enter("%zx @%llx", flen, start); + kenter("%zx @%llx", flen, start); ret = -ENOMEM; @@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, error_put: netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); error: - _leave(" = %d", ret); + kleave(" = %d", ret); return ret; } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 07bc1fd435309..ecbc99ec7d367 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, struct netfs_group *group = netfs_folio_group(folio); loff_t pos = folio_file_pos(folio); - _enter(""); + kenter(""); if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) return NETFS_FLUSH_CONTENT; @@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, */ howto = netfs_how_to_modify(ctx, file, folio, netfs_group, flen, offset, part, maybe_trouble); - _debug("howto %u", howto); + kdebug("howto %u", howto); switch (howto) { case NETFS_JUST_PREFETCH: ret = netfs_prefetch_for_write(file, folio, offset, part); if (ret < 0) { - _debug("prefetch = %zd", ret); + kdebug("prefetch = %zd", ret); goto error_folio_unlock; } break; @@ -418,7 +418,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, } iocb->ki_pos += written; - _leave(" = %zd [%zd]", written, ret); + kleave(" = %zd [%zd]", written, ret); return written ? written : ret; error_folio_unlock: @@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct netfs_inode *ictx = netfs_inode(inode); ssize_t ret; - _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; @@ -523,17 +523,23 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr struct netfs_group *group; struct folio *folio = page_folio(vmf->page); struct file *file = vmf->vma->vm_file; + struct address_space *mapping = file->f_mapping; struct inode *inode = file_inode(file); struct netfs_inode *ictx = netfs_inode(inode); vm_fault_t ret = VM_FAULT_RETRY; int err; - _enter("%lx", folio->index); + kenter("%lx", folio->index); sb_start_pagefault(inode->i_sb); if (folio_lock_killable(folio) < 0) goto out; + if (folio->mapping != mapping) { + folio_unlock(folio); + ret = VM_FAULT_NOPAGE; + goto out; + } if (folio_wait_writeback_killable(folio)) { ret = VM_FAULT_LOCKED; @@ -549,9 +555,9 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr group = netfs_folio_group(folio); if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) { folio_unlock(folio); - err = filemap_fdatawait_range(inode->i_mapping, - folio_pos(folio), - folio_pos(folio) + folio_size(folio)); + err = filemap_fdatawrite_range(mapping, + folio_pos(folio), + folio_pos(folio) + folio_size(folio)); switch (err) { case 0: ret = VM_FAULT_RETRY; diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index 10a1e4da6bda5..b6debac6205f7 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i size_t orig_count = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - _enter(""); + kenter(""); if (!orig_count) return 0; /* Don't update atime */ diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index e14cd53ac9fd7..792ef17bae21d 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * size_t len = iov_iter_count(iter); bool async = !is_sync_kiocb(iocb); - _enter(""); + kenter(""); /* We're going to need a bounce buffer if what we transmit is going to * be different in some way to the source buffer, e.g. because it gets @@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * */ // TODO - _debug("uw %llx-%llx", start, end); + kdebug("uw %llx-%llx", start, end); wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start, iocb->ki_flags & IOCB_DIRECT ? @@ -92,10 +92,11 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); if (async) wreq->iocb = iocb; + wreq->len = iov_iter_count(&wreq->io_iter); wreq->cleanup = netfs_cleanup_dio_write; - ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), iov_iter_count(&wreq->io_iter)); + ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { - _debug("begin = %zd", ret); + kdebug("begin = %zd", ret); goto out; } @@ -142,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) loff_t pos = iocb->ki_pos; unsigned long long end = pos + iov_iter_count(from) - 1; - _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); + kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode)); if (!iov_iter_count(from)) return 0; diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c index 9397ed39b0b4e..288a73c3072d7 100644 --- a/fs/netfs/fscache_cache.c +++ b/fs/netfs/fscache_cache.c @@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache, { int n_accesses; - _enter("{%s,%s}", ops->name, cache->name); + kenter("{%s,%s}", ops->name, cache->name); BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING); @@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache, up_write(&fscache_addremove_sem); pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name); - _leave(" = 0 [%s]", cache->name); + kleave(" = 0 [%s]", cache->name); return 0; } EXPORT_SYMBOL(fscache_add_cache); diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index bce2492186d0b..4d1e8bf4c615f 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie( { struct fscache_cookie *cookie; - _enter("V=%x", volume->debug_id); + kenter("V=%x", volume->debug_id); if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255) return NULL; @@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie( trace_fscache_acquire(cookie); fscache_stat(&fscache_n_acquires_ok); - _leave(" = c=%08x", cookie->debug_id); + kleave(" = c=%08x", cookie->debug_id); return cookie; } EXPORT_SYMBOL(__fscache_acquire_cookie); @@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed; bool need_withdraw = false; - _enter(""); + kenter(""); if (!cookie->volume->cache_priv) { fscache_create_volume(cookie->volume, true); @@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) if (cookie->state != FSCACHE_COOKIE_STATE_FAILED) fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); need_withdraw = true; - _leave(" [fail]"); + kleave(" [fail]"); goto out; } @@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) bool queue = false; int n_active; - _enter("c=%08x", cookie->debug_id); + kenter("c=%08x", cookie->debug_id); if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), "Trying to use relinquished cookie\n")) @@ -636,7 +636,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) spin_unlock(&cookie->lock); if (queue) fscache_queue_cookie(cookie, fscache_cookie_get_use_work); - _leave(""); + kleave(""); } EXPORT_SYMBOL(__fscache_use_cookie); @@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie) enum fscache_cookie_state state; bool wake = false; - _enter("c=%x", cookie->debug_id); + kenter("c=%x", cookie->debug_id); again: spin_lock(&cookie->lock); @@ -820,7 +820,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie) spin_unlock(&cookie->lock); if (wake) wake_up_cookie_state(cookie); - _leave(""); + kleave(""); } static void fscache_cookie_worker(struct work_struct *work) @@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie) set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags); spin_unlock(&cookie->lock); fscache_stat(&fscache_n_cookies_lru_expired); - _debug("lru c=%x", cookie->debug_id); + kdebug("lru c=%x", cookie->debug_id); __fscache_withdraw_cookie(cookie); } @@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) if (retire) fscache_stat(&fscache_n_relinquishes_retire); - _enter("c=%08x{%d},%d", + kenter("c=%08x{%d},%d", cookie->debug_id, atomic_read(&cookie->n_active), retire); if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), @@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, { bool is_caching; - _enter("c=%x", cookie->debug_id); + kenter("c=%x", cookie->debug_id); fscache_stat(&fscache_n_invalidates); @@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */ default: spin_unlock(&cookie->lock); - _leave(" [no %u]", cookie->state); + kleave(" [no %u]", cookie->state); return; case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, fallthrough; case FSCACHE_COOKIE_STATE_CREATING: spin_unlock(&cookie->lock); - _leave(" [look %x]", cookie->inval_counter); + kleave(" [look %x]", cookie->inval_counter); return; case FSCACHE_COOKIE_STATE_ACTIVE: @@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie, if (is_caching) fscache_queue_cookie(cookie, fscache_cookie_get_inval_work); - _leave(" [inv]"); + kleave(" [inv]"); return; } } diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index 38637e5c9b577..bf4eaeec44fba 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres, again: if (!fscache_cache_is_live(cookie->volume->cache)) { - _leave(" [broken]"); + kleave(" [broken]"); return false; } state = fscache_cookie_state(cookie); - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_CREATING: @@ -52,7 +52,7 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres, case FSCACHE_COOKIE_STATE_DROPPED: case FSCACHE_COOKIE_STATE_RELINQUISHING: default: - _leave(" [not live]"); + kleave(" [not live]"); return false; } @@ -92,7 +92,7 @@ static int fscache_begin_operation(struct netfs_cache_resources *cres, spin_lock(&cookie->lock); state = fscache_cookie_state(cookie); - _enter("c=%08x{%u},%x", cookie->debug_id, state, want_state); + kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state); switch (state) { case FSCACHE_COOKIE_STATE_LOOKING_UP: @@ -140,7 +140,7 @@ static int fscache_begin_operation(struct netfs_cache_resources *cres, cres->cache_priv = NULL; cres->ops = NULL; fscache_end_cookie_access(cookie, fscache_access_io_not_live); - _leave(" = -ENOBUFS"); + kleave(" = -ENOBUFS"); return -ENOBUFS; } @@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, if (len == 0) goto abandon; - _enter("%llx,%zx", start, len); + kenter("%llx,%zx", start, len); wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS); if (!wreq) diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c index 42e98bb523e36..bf9b33d26e312 100644 --- a/fs/netfs/fscache_main.c +++ b/fs/netfs/fscache_main.c @@ -99,7 +99,7 @@ int __init fscache_init(void) */ void __exit fscache_exit(void) { - _enter(""); + kenter(""); kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c index cdf991bdd9def..2e2a405ca9b02 100644 --- a/fs/netfs/fscache_volume.c +++ b/fs/netfs/fscache_volume.c @@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, return volume; } +struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where) +{ + int ref; + + if (!__refcount_inc_not_zero(&volume->ref, &ref)) + return NULL; + + trace_fscache_volume(volume->debug_id, ref + 1, where); + return volume; +} +EXPORT_SYMBOL(fscache_try_get_volume); + static void fscache_see_volume(struct fscache_volume *volume, enum fscache_volume_trace where) { @@ -251,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, fscache_see_volume(volume, fscache_volume_new_acquire); fscache_stat(&fscache_n_volumes); up_write(&fscache_addremove_sem); - _leave(" = v=%x", volume->debug_id); + kleave(" = v=%x", volume->debug_id); return volume; err_vol: @@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume, fscache_free_volume(volume); } } +EXPORT_SYMBOL(fscache_put_volume); /* * Relinquish a volume representation cookie. @@ -452,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume) { int n_accesses; - _debug("withdraw V=%x", volume->debug_id); + kdebug("withdraw V=%x", volume->debug_id); /* Allow wakeups on dec-to-0 */ n_accesses = atomic_dec_return(&volume->n_accesses); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 95e281a8af788..21e46bc9aa490 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -34,7 +34,6 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync); /* * main.c */ -extern unsigned int netfs_debug; extern struct list_head netfs_io_requests; extern spinlock_t netfs_proc_lock; extern mempool_t netfs_request_pool; @@ -63,15 +62,6 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} /* * misc.c */ -#define NETFS_FLAG_PUT_MARK BIT(0) -#define NETFS_FLAG_PAGECACHE_MARK BIT(1) -int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, - struct folio *folio, unsigned int flags, - gfp_t gfp_mask); -int netfs_add_folios_to_buffer(struct xarray *buffer, - struct address_space *mapping, - pgoff_t index, pgoff_t to, gfp_t gfp_mask); -void netfs_clear_buffer(struct xarray *buffer); /* * objects.c @@ -353,8 +343,6 @@ extern const struct seq_operations fscache_volumes_seq_ops; struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, enum fscache_volume_trace where); -void fscache_put_volume(struct fscache_volume *volume, - enum fscache_volume_trace where); bool fscache_begin_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); @@ -365,42 +353,12 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait); * debug tracing */ #define dbgprintk(FMT, ...) \ - printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) + pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) -#ifdef __KDEBUG -#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) -#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) -#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) - -#elif defined(CONFIG_NETFS_DEBUG) -#define _enter(FMT, ...) \ -do { \ - if (netfs_debug) \ - kenter(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _leave(FMT, ...) \ -do { \ - if (netfs_debug) \ - kleave(FMT, ##__VA_ARGS__); \ -} while (0) - -#define _debug(FMT, ...) \ -do { \ - if (netfs_debug) \ - kdebug(FMT, ##__VA_ARGS__); \ -} while (0) - -#else -#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__) -#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__) -#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__) -#endif - /* * assertions */ diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c93851b983688..c7576481c321d 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, if (count == remaining) return; - _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", + kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", rreq->debug_id, subreq->debug_index, iov_iter_count(&subreq->io_iter), subreq->transferred, subreq->len, rreq->i_size, @@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, struct netfs_io_request *rreq = subreq->rreq; int u; - _enter("R=%x[%x]{%llx,%lx},%zd", + kenter("R=%x[%x]{%llx,%lx},%zd", rreq->debug_id, subreq->debug_index, subreq->start, subreq->flags, transferred_or_error); @@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, struct netfs_inode *ictx = netfs_inode(rreq->inode); size_t lsize; - _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); + kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); if (rreq->origin != NETFS_DIO_READ) { source = netfs_cache_prepare_read(subreq, rreq->i_size); @@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, subreq->start = rreq->start + rreq->submitted; subreq->len = io_iter->count; - _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); + kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); list_add_tail(&subreq->rreq_link, &rreq->subrequests); /* Call out to the cache to find out what it can do with the remaining @@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) struct iov_iter io_iter; int ret; - _enter("R=%x %llx-%llx", + kenter("R=%x %llx-%llx", rreq->debug_id, rreq->start, rreq->start + rreq->len - 1); if (rreq->len == 0) { @@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) atomic_set(&rreq->nr_outstanding, 1); io_iter = rreq->io_iter; do { - _debug("submit %llx + %llx >= %llx", + kdebug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); if (rreq->origin == NETFS_DIO_READ && rreq->start + rreq->submitted >= rreq->i_size) diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 5f0f438e5d211..db824c372842a 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -20,10 +20,6 @@ MODULE_LICENSE("GPL"); EXPORT_TRACEPOINT_SYMBOL(netfs_sreq); -unsigned netfs_debug; -module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); - static struct kmem_cache *netfs_request_slab; static struct kmem_cache *netfs_subrequest_slab; mempool_t netfs_request_pool; diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index bc1fc54fb7247..172808e83ca81 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -8,87 +8,6 @@ #include #include "internal.h" -/* - * Attach a folio to the buffer and maybe set marks on it to say that we need - * to put the folio later and twiddle the pagecache flags. - */ -int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, - struct folio *folio, unsigned int flags, - gfp_t gfp_mask) -{ - XA_STATE_ORDER(xas, xa, index, folio_order(folio)); - -retry: - xas_lock(&xas); - for (;;) { - xas_store(&xas, folio); - if (!xas_error(&xas)) - break; - xas_unlock(&xas); - if (!xas_nomem(&xas, gfp_mask)) - return xas_error(&xas); - goto retry; - } - - if (flags & NETFS_FLAG_PUT_MARK) - xas_set_mark(&xas, NETFS_BUF_PUT_MARK); - if (flags & NETFS_FLAG_PAGECACHE_MARK) - xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK); - xas_unlock(&xas); - return xas_error(&xas); -} - -/* - * Create the specified range of folios in the buffer attached to the read - * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that - * these need freeing later. - */ -int netfs_add_folios_to_buffer(struct xarray *buffer, - struct address_space *mapping, - pgoff_t index, pgoff_t to, gfp_t gfp_mask) -{ - struct folio *folio; - int ret; - - if (to + 1 == index) /* Page range is inclusive */ - return 0; - - do { - /* TODO: Figure out what order folio can be allocated here */ - folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0); - if (!folio) - return -ENOMEM; - folio->index = index; - ret = netfs_xa_store_and_mark(buffer, index, folio, - NETFS_FLAG_PUT_MARK, gfp_mask); - if (ret < 0) { - folio_put(folio); - return ret; - } - - index += folio_nr_pages(folio); - } while (index <= to && index != 0); - - return 0; -} - -/* - * Clear an xarray buffer, putting a ref on the folios that have - * NETFS_BUF_PUT_MARK set. - */ -void netfs_clear_buffer(struct xarray *buffer) -{ - struct folio *folio; - XA_STATE(xas, buffer, 0); - - rcu_read_lock(); - xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) { - folio_put(folio); - } - rcu_read_unlock(); - xa_destroy(buffer); -} - /** * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback * @mapping: The mapping the folio belongs to. @@ -107,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) struct fscache_cookie *cookie = netfs_i_cookie(ictx); bool need_use = false; - _enter(""); + kenter(""); if (!filemap_dirty_folio(mapping, folio)) return false; @@ -180,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) struct netfs_folio *finfo; size_t flen = folio_size(folio); - _enter("{%lx},%zx,%zx", folio->index, offset, length); + kenter("{%lx},%zx,%zx", folio->index, offset, length); if (!folio_test_private(folio)) return; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 426cf87aaf2ec..488147439fe0f 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, { struct list_head *next; - _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); + kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); if (list_empty(&stream->subrequests)) return; @@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) unsigned int notes; int s; - _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); + kenter("%llx-%llx", wreq->start, wreq->start + wreq->len); trace_netfs_collect(wreq); trace_netfs_rreq(wreq, netfs_rreq_trace_collect); @@ -409,7 +409,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) front = stream->front; while (front) { trace_netfs_collect_sreq(wreq, front); - //_debug("sreq [%x] %llx %zx/%zx", + //kdebug("sreq [%x] %llx %zx/%zx", // front->debug_index, front->start, front->transferred, front->len); /* Stall if there may be a discontinuity. */ @@ -598,7 +598,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) out: netfs_put_group_many(wreq->group, wreq->nr_group_rel); wreq->nr_group_rel = 0; - _leave(" = %x", notes); + kleave(" = %x", notes); return; need_retry: @@ -606,7 +606,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) * that any partially completed op will have had any wholly transferred * folios removed from it. */ - _debug("retry"); + kdebug("retry"); netfs_retry_writes(wreq); goto out; } @@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work) size_t transferred; int s; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); netfs_see_request(wreq, netfs_rreq_trace_see_work); if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { @@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work) if (wreq->origin == NETFS_DIO_WRITE) inode_dio_end(wreq->inode); - _debug("finished"); + kdebug("finished"); trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); @@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, struct netfs_io_request *wreq = subreq->rreq; struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; - _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); + kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); switch (subreq->source) { case NETFS_UPLOAD_TO_SERVER: diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 3aa86e268f40d..d7c971df88660 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, if (IS_ERR(wreq)) return wreq; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) @@ -159,7 +159,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq, subreq->max_nr_segs = INT_MAX; subreq->stream_nr = stream->stream_nr; - _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); + kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index); trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), @@ -215,7 +215,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream, { struct netfs_io_request *wreq = subreq->rreq; - _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); + kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) return netfs_write_subrequest_terminated(subreq, subreq->error, false); @@ -272,11 +272,11 @@ int netfs_advance_write(struct netfs_io_request *wreq, size_t part; if (!stream->avail) { - _leave("no write"); + kleave("no write"); return len; } - _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); + kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); if (subreq && start != subreq->start + subreq->len) { netfs_issue_write(wreq, stream); @@ -288,7 +288,7 @@ int netfs_advance_write(struct netfs_io_request *wreq, subreq = stream->construct; part = min(subreq->max_len - subreq->len, len); - _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); + kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len); subreq->len += part; subreq->nr_segs++; @@ -319,7 +319,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, bool to_eof = false, streamw = false; bool debug = false; - _enter(""); + kenter(""); /* netfs_perform_write() may shift i_size around the page or from out * of the page to beyond it, but cannot move i_size into or through the @@ -329,7 +329,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (fpos >= i_size) { /* mmap beyond eof. */ - _debug("beyond eof"); + kdebug("beyond eof"); folio_start_writeback(folio); folio_unlock(folio); wreq->nr_group_rel += netfs_folio_written_back(folio); @@ -363,7 +363,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, } flen -= foff; - _debug("folio %zx %zx %zx", foff, flen, fsize); + kdebug("folio %zx %zx %zx", foff, flen, fsize); /* Deal with discontinuities in the stream of dirty pages. These can * arise from a number of sources: @@ -483,11 +483,11 @@ static int netfs_write_folio(struct netfs_io_request *wreq, if (!debug) kdebug("R=%x: No submit", wreq->debug_id); - if (flen < fsize) + if (foff + flen < fsize) for (int s = 0; s < NR_IO_STREAMS; s++) netfs_issue_write(wreq, &wreq->io_streams[s]); - _leave(" = 0"); + kleave(" = 0"); return 0; } @@ -522,7 +522,7 @@ int netfs_writepages(struct address_space *mapping, netfs_stat(&netfs_n_wh_writepages); do { - _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); + kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted); /* It appears we don't have to handle cyclic writeback wrapping. */ WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted); @@ -546,14 +546,14 @@ int netfs_writepages(struct address_space *mapping, mutex_unlock(&ictx->wb_lock); netfs_put_request(wreq, false, netfs_rreq_trace_put_return); - _leave(" = %d", error); + kleave(" = %d", error); return error; couldnt_start: netfs_kill_dirty_pages(mapping, wbc, folio); out: mutex_unlock(&ictx->wb_lock); - _leave(" = %d", error); + kleave(" = %d", error); return error; } EXPORT_SYMBOL(netfs_writepages); @@ -590,7 +590,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache) { - _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", + kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u", wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end); if (!*writethrough_cache) { @@ -624,7 +624,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr struct netfs_inode *ictx = netfs_inode(wreq->inode); int ret; - _enter("R=%x", wreq->debug_id); + kenter("R=%x", wreq->debug_id); if (writethrough_cache) netfs_write_folio(wreq, wbc, writethrough_cache); @@ -657,7 +657,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t loff_t start = wreq->start; int error = 0; - _enter("%zx", len); + kenter("%zx", len); if (wreq->origin == NETFS_DIO_WRITE) inode_dio_begin(wreq->inode); @@ -665,7 +665,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t while (len) { // TODO: Prepare content encryption - _debug("unbuffered %zx", len); + kdebug("unbuffered %zx", len); part = netfs_advance_write(wreq, upload, start, len, false); start += part; len -= part; @@ -684,6 +684,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t if (list_empty(&upload->subrequests)) netfs_wake_write_collector(wreq, false); - _leave(" = %d", error); + kleave(" = %d", error); return error; } diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 89caef7513db3..ba50388ee4bf1 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -377,11 +377,12 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, * @target: offset number of an entry in the group (start point) * @bsize: size in bits * @lock: spin lock protecting @bitmap + * @wrap: whether to wrap around */ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, unsigned int bsize, - spinlock_t *lock) + spinlock_t *lock, bool wrap) { int pos, end = bsize; @@ -397,6 +398,8 @@ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, end = target; } + if (!wrap) + return -ENOSPC; /* wrap around */ for (pos = 0; pos < end; pos++) { @@ -495,9 +498,10 @@ int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object * @inode: inode of metadata file using this allocator * @req: nilfs_palloc_req structure exchanged for the allocation + * @wrap: whether to wrap around */ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, - struct nilfs_palloc_req *req) + struct nilfs_palloc_req *req, bool wrap) { struct buffer_head *desc_bh, *bitmap_bh; struct nilfs_palloc_group_desc *desc; @@ -516,7 +520,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, entries_per_group = nilfs_palloc_entries_per_group(inode); for (i = 0; i < ngroups; i += n) { - if (group >= ngroups) { + if (group >= ngroups && wrap) { /* wrap around */ group = 0; maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, @@ -550,7 +554,14 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, bitmap_kaddr = kmap_local_page(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); pos = nilfs_palloc_find_available_slot( - bitmap, group_offset, entries_per_group, lock); + bitmap, group_offset, entries_per_group, lock, + wrap); + /* + * Since the search for a free slot in the second and + * subsequent bitmap blocks always starts from the + * beginning, the wrap flag only has an effect on the + * first search. + */ kunmap_local(bitmap_kaddr); if (pos >= 0) goto found; diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index b667e869ac076..d825a9faca6d9 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -50,8 +50,8 @@ struct nilfs_palloc_req { struct buffer_head *pr_entry_bh; }; -int nilfs_palloc_prepare_alloc_entry(struct inode *, - struct nilfs_palloc_req *); +int nilfs_palloc_prepare_alloc_entry(struct inode *inode, + struct nilfs_palloc_req *req, bool wrap); void nilfs_palloc_commit_alloc_entry(struct inode *, struct nilfs_palloc_req *); void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 180fc8d36213d..fc1caf63a42ae 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -75,7 +75,7 @@ int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) { int ret; - ret = nilfs_palloc_prepare_alloc_entry(dat, req); + ret = nilfs_palloc_prepare_alloc_entry(dat, req, true); if (ret < 0) return ret; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 52e50b1b7f22d..4a29b0138d75f 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -135,6 +135,9 @@ static bool nilfs_check_folio(struct folio *folio, char *kaddr) goto Enamelen; if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) goto Espan; + if (unlikely(p->inode && + NILFS_PRIVATE_INODE(le64_to_cpu(p->inode)))) + goto Einumber; } if (offs != limit) goto Eend; @@ -160,6 +163,9 @@ static bool nilfs_check_folio(struct folio *folio, char *kaddr) goto bad_entry; Espan: error = "directory entry across blocks"; + goto bad_entry; +Einumber: + error = "disallowed inode number"; bad_entry: nilfs_error(sb, "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d", @@ -377,11 +383,39 @@ struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) { - struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop); + struct folio *folio; + struct nilfs_dir_entry *de, *next_de; + size_t limit; + char *msg; + de = nilfs_get_folio(dir, 0, &folio); if (IS_ERR(de)) return NULL; - return nilfs_next_entry(de); + + limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */ + if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino || + !nilfs_match(1, ".", de))) { + msg = "missing '.'"; + goto fail; + } + + next_de = nilfs_next_entry(de); + /* + * If "next_de" has not reached the end of the chunk, there is + * at least one more record. Check whether it matches "..". + */ + if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) || + !nilfs_match(2, "..", next_de))) { + msg = "missing '..'"; + goto fail; + } + *foliop = folio; + return next_de; + +fail: + nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); + folio_release_kmap(folio, de); + return NULL; } ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 612e609158b52..1e86b9303b7ca 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -56,13 +56,10 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, struct nilfs_palloc_req req; int ret; - req.pr_entry_nr = 0; /* - * 0 says find free inode from beginning - * of a group. dull code!! - */ + req.pr_entry_nr = NILFS_FIRST_INO(ifile->i_sb); req.pr_entry_bh = NULL; - ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); + ret = nilfs_palloc_prepare_alloc_entry(ifile, &req, false); if (!ret) { ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, &req.pr_entry_bh); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 728e90be3570b..4017f78564405 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -116,9 +116,15 @@ enum { #define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) #define NILFS_MDT_INODE(sb, ino) \ - ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & BIT(ino))) + ((ino) < NILFS_USER_INO && (NILFS_MDT_INO_BITS & BIT(ino))) #define NILFS_VALID_INODE(sb, ino) \ - ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & BIT(ino))) + ((ino) >= NILFS_FIRST_INO(sb) || \ + ((ino) < NILFS_USER_INO && (NILFS_SYS_INO_BITS & BIT(ino)))) + +#define NILFS_PRIVATE_INODE(ino) ({ \ + ino_t __ino = (ino); \ + ((__ino) < NILFS_USER_INO && (__ino) != NILFS_ROOT_INO && \ + (__ino) != NILFS_SKETCH_INO); }) /** * struct nilfs_transaction_info: context information for synchronization diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index f41d7b6d432c6..e44dde57ab652 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -452,6 +452,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, } nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); + if (nilfs->ns_first_ino < NILFS_USER_INO) { + nilfs_err(nilfs->ns_sb, + "too small lower limit for non-reserved inode numbers: %u", + nilfs->ns_first_ino); + return -EINVAL; + } nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 85da0629415df..1e829ed7b0ef5 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -182,7 +182,7 @@ struct the_nilfs { unsigned long ns_nrsvsegs; unsigned long ns_first_data_block; int ns_inode_size; - int ns_first_ino; + unsigned int ns_first_ino; u32 ns_crc_seed; /* /sys/fs// */ diff --git a/fs/open.c b/fs/open.c index 50e45bc7c4d8b..278b3edcda444 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1004,11 +1004,6 @@ static int do_dentry_open(struct file *f, } } - /* - * Once we return a file with FMODE_OPENED, __fput() will call - * fsnotify_close(), so we need fsnotify_open() here for symmetry. - */ - fsnotify_open(f); return 0; cleanup_all: @@ -1085,8 +1080,19 @@ EXPORT_SYMBOL(file_path); */ int vfs_open(const struct path *path, struct file *file) { + int ret; + file->f_path = *path; - return do_dentry_open(file, NULL); + ret = do_dentry_open(file, NULL); + if (!ret) { + /* + * Once we return a file with FMODE_OPENED, __fput() will call + * fsnotify_close(), so we need fsnotify_open() here for + * symmetry. + */ + fsnotify_open(file); + } + return ret; } struct file *dentry_open(const struct path *path, int flags, @@ -1177,8 +1183,10 @@ struct file *kernel_file_open(const struct path *path, int flags, error = do_dentry_open(f, NULL); if (error) { fput(f); - f = ERR_PTR(error); + return ERR_PTR(error); } + + fsnotify_open(f); return f; } EXPORT_SYMBOL_GPL(kernel_file_open); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 557b68e99d0a0..a865941724c02 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1918,8 +1918,8 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ -#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) -#define CIFSSEC_MAX (CIFSSEC_MUST_NTLMV2) +#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) +#define CIFSSEC_MAX (CIFSSEC_MAY_SIGN | CIFSSEC_MUST_KRB5 | CIFSSEC_MAY_SEAL) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* ***************************************************************** diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index f1f2573bb18df..1374635e89fae 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -245,35 +245,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) return 0; } -/* - * Expand the size of a readahead to the size of the rsize, if at least as - * large as a page, allowing for the possibility that rsize is not pow-2 - * aligned. - */ -static void cifs_expand_readahead(struct netfs_io_request *rreq) -{ - unsigned int rsize = rreq->rsize; - loff_t misalignment, i_size = i_size_read(rreq->inode); - - if (rsize < PAGE_SIZE) - return; - - if (rsize < INT_MAX) - rsize = roundup_pow_of_two(rsize); - else - rsize = ((unsigned int)INT_MAX + 1) / 2; - - misalignment = rreq->start & (rsize - 1); - if (misalignment) { - rreq->start -= misalignment; - rreq->len += misalignment; - } - - rreq->len = round_up(rreq->len, rsize); - if (rreq->start < i_size && rreq->len > i_size - rreq->start) - rreq->len = i_size - rreq->start; -} - /* * Completion of a request operation. */ @@ -329,7 +300,6 @@ const struct netfs_request_ops cifs_req_ops = { .init_request = cifs_init_request, .free_request = cifs_free_request, .free_subrequest = cifs_free_subrequest, - .expand_readahead = cifs_expand_readahead, .clamp_length = cifs_clamp_length, .issue_read = cifs_req_issue_read, .done = cifs_rreq_done, diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 8d10be1fe18a8..c3ee42188d252 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -917,6 +917,40 @@ struct smb2_query_directory_rsp { __u8 Buffer[]; } __packed; +/* DeviceType Flags */ +#define FILE_DEVICE_CD_ROM 0x00000002 +#define FILE_DEVICE_CD_ROM_FILE_SYSTEM 0x00000003 +#define FILE_DEVICE_DFS 0x00000006 +#define FILE_DEVICE_DISK 0x00000007 +#define FILE_DEVICE_DISK_FILE_SYSTEM 0x00000008 +#define FILE_DEVICE_FILE_SYSTEM 0x00000009 +#define FILE_DEVICE_NAMED_PIPE 0x00000011 +#define FILE_DEVICE_NETWORK 0x00000012 +#define FILE_DEVICE_NETWORK_FILE_SYSTEM 0x00000014 +#define FILE_DEVICE_NULL 0x00000015 +#define FILE_DEVICE_PARALLEL_PORT 0x00000016 +#define FILE_DEVICE_PRINTER 0x00000018 +#define FILE_DEVICE_SERIAL_PORT 0x0000001b +#define FILE_DEVICE_STREAMS 0x0000001e +#define FILE_DEVICE_TAPE 0x0000001f +#define FILE_DEVICE_TAPE_FILE_SYSTEM 0x00000020 +#define FILE_DEVICE_VIRTUAL_DISK 0x00000024 +#define FILE_DEVICE_NETWORK_REDIRECTOR 0x00000028 + +/* Device Characteristics */ +#define FILE_REMOVABLE_MEDIA 0x00000001 +#define FILE_READ_ONLY_DEVICE 0x00000002 +#define FILE_FLOPPY_DISKETTE 0x00000004 +#define FILE_WRITE_ONCE_MEDIA 0x00000008 +#define FILE_REMOTE_DEVICE 0x00000010 +#define FILE_DEVICE_IS_MOUNTED 0x00000020 +#define FILE_VIRTUAL_VOLUME 0x00000040 +#define FILE_DEVICE_SECURE_OPEN 0x00000100 +#define FILE_CHARACTERISTIC_TS_DEVICE 0x00001000 +#define FILE_CHARACTERISTIC_WEBDAV_DEVICE 0x00002000 +#define FILE_PORTABLE_DEVICE 0x00004000 +#define FILE_DEVICE_ALLOW_APPCONTAINER_TRAVERSAL 0x00020000 + /* * Maximum number of iovs we need for a set-info request. * The largest one is rename/hardlink diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e7e07891781b3..840c71c66b30b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2051,15 +2051,22 @@ int smb2_tree_connect(struct ksmbd_work *work) * @access: file access flags * @disposition: file disposition flags * @may_flags: set with MAY_ flags + * @is_dir: is creating open flags for directory * * Return: file open flags */ static int smb2_create_open_flags(bool file_present, __le32 access, __le32 disposition, - int *may_flags) + int *may_flags, + bool is_dir) { int oflags = O_NONBLOCK | O_LARGEFILE; + if (is_dir) { + access &= ~FILE_WRITE_DESIRE_ACCESS_LE; + ksmbd_debug(SMB, "Discard write access to a directory\n"); + } + if (access & FILE_READ_DESIRED_ACCESS_LE && access & FILE_WRITE_DESIRE_ACCESS_LE) { oflags |= O_RDWR; @@ -3167,7 +3174,9 @@ int smb2_open(struct ksmbd_work *work) open_flags = smb2_create_open_flags(file_present, daccess, req->CreateDisposition, - &may_flags); + &may_flags, + req->CreateOptions & FILE_DIRECTORY_FILE_LE || + (file_present && S_ISDIR(d_inode(path.dentry)->i_mode))); if (!test_tree_conn_flag(tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { if (open_flags & (O_CREAT | O_TRUNC)) { @@ -5314,8 +5323,13 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info = (struct filesystem_device_info *)rsp->Buffer; - info->DeviceType = cpu_to_le32(stfs.f_type); - info->DeviceCharacteristics = cpu_to_le32(0x00000020); + info->DeviceType = cpu_to_le32(FILE_DEVICE_DISK); + info->DeviceCharacteristics = + cpu_to_le32(FILE_DEVICE_IS_MOUNTED); + if (!test_tree_conn_flag(work->tcon, + KSMBD_TREE_CONN_FLAG_WRITABLE)) + info->DeviceCharacteristics |= + cpu_to_le32(FILE_READ_ONLY_DEVICE); rsp->OutputBufferLength = cpu_to_le32(8); break; } diff --git a/fs/super.c b/fs/super.c index b72f1d288e954..095ba793e10cf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1502,8 +1502,17 @@ static int fs_bdev_thaw(struct block_device *bdev) lockdep_assert_held(&bdev->bd_fsfreeze_mutex); + /* + * The block device may have been frozen before it was claimed by a + * filesystem. Concurrently another process might try to mount that + * frozen block device and has temporarily claimed the block device for + * that purpose causing a concurrent fs_bdev_thaw() to end up here. The + * mounter is already about to abort mounting because they still saw an + * elevanted bdev->bd_fsfreeze_count so get_bdev_super() will return + * NULL in that case. + */ sb = get_bdev_super(bdev); - if (WARN_ON_ONCE(!sb)) + if (!sb) return -EINVAL; if (sb->s_op->thaw_super) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index eee7320ab0b02..17e409ceaa336 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -2057,7 +2057,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, goto out; features = uffdio_api.features; ret = -EINVAL; - if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) + if (uffdio_api.api != UFFD_API) goto err_out; ret = -EPERM; if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) @@ -2081,6 +2081,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; #endif + + ret = -EINVAL; + if (features & ~uffdio_api.features) + goto err_out; + uffdio_api.ioctls = UFFD_API_IOCTLS; ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) diff --git a/include/drm/amd_rdma.h b/include/drm/amd_rdma.h new file mode 100644 index 0000000000000..99682afae6754 --- /dev/null +++ b/include/drm/amd_rdma.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2015-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* @file This file defined kernel interfaces to communicate with amdkfd */ + +#ifndef AMD_RDMA_H_ +#define AMD_RDMA_H_ + +/* API versions: + * 1.0 Original API until ROCm 4.1, AMD_RDMA_MAJOR/MINOR undefined + * 2.0 Added IOMMU (dma-mapping) support, removed p2p_info.kfd_proc + * Introduced AMD_RDMA_MAJOR/MINOR version definition + */ +#define AMD_RDMA_MAJOR 2 +#define AMD_RDMA_MINOR 0 + +/** + * Structure describing information needed to P2P access from another device + * to specific location of GPU memory + */ +struct amd_p2p_info { + uint64_t va; /**< Specify user virt. address + * which this page table + * described + */ + uint64_t size; /**< Specify total size of + * allocation + */ + struct pid *pid; /**< Specify process pid to which + * virtual address belongs + */ + struct sg_table *pages; /**< Specify DMA/Bus addresses */ + void *priv; /**< Pointer set by AMD kernel + * driver + */ +}; + +/** + * Structure providing function pointers to support rdma/p2p requirements. + * to specific location of GPU memory + */ +struct amd_rdma_interface { + int (*get_pages)(uint64_t address, uint64_t length, struct pid *pid, + struct device *dma_dev, + struct amd_p2p_info **amd_p2p_data, + void (*free_callback)(void *client_priv), + void *client_priv); + int (*put_pages)(struct amd_p2p_info **amd_p2p_data); + int (*is_gpu_address)(uint64_t address, struct pid *pid); + int (*get_page_size)(uint64_t address, uint64_t length, struct pid *pid, + unsigned long *page_size); +}; + + +int amdkfd_query_rdma_interface(const struct amd_rdma_interface **rdma); + + +#endif /* AMD_RDMA_H_ */ diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index 173548c6473a9..a78d23c91c8d4 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -1016,6 +1016,8 @@ # define DP_EDP_BACKLIGHT_FREQ_AUX_SET_CAP (1 << 5) # define DP_EDP_DYNAMIC_BACKLIGHT_CAP (1 << 6) # define DP_EDP_VBLANK_BACKLIGHT_UPDATE_CAP (1 << 7) +#define DP_EDP_OLED_VESA_BRIGHTNESS_ON 0x80 +# define DP_EDP_OLED_VESA_CAP (1 << 4) #define DP_EDP_GENERAL_CAP_2 0x703 # define DP_EDP_OVERDRIVE_ENGINE_ENABLED (1 << 0) diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index cfe096389d94f..dd466631f174f 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -244,18 +244,18 @@ struct drm_dp_mst_branch { bool link_address_sent; /* global unique identifier to identify branch devices */ - u8 guid[16]; + guid_t guid; }; struct drm_dp_nak_reply { - u8 guid[16]; + guid_t guid; u8 reason; u8 nak_data; }; struct drm_dp_link_address_ack_reply { - u8 guid[16]; + guid_t guid; u8 nports; struct drm_dp_link_addr_reply_port { bool input_port; @@ -265,7 +265,7 @@ struct drm_dp_link_address_ack_reply { bool ddps; bool legacy_device_plug_status; u8 dpcd_revision; - u8 peer_guid[16]; + guid_t peer_guid; u8 num_sdp_streams; u8 num_sdp_stream_sinks; } ports[16]; @@ -348,7 +348,7 @@ struct drm_dp_allocate_payload_ack_reply { }; struct drm_dp_connection_status_notify { - u8 guid[16]; + guid_t guid; u8 port_number; bool legacy_device_plug_status; bool displayport_device_plug_status; @@ -425,7 +425,7 @@ struct drm_dp_query_payload { struct drm_dp_resource_status_notify { u8 port_number; - u8 guid[16]; + guid_t guid; u16 available_pbn; }; diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 63767cf24371b..c91f87b5242d7 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -213,8 +213,9 @@ struct drm_device { * This can be set to true it the hardware has a working vblank counter * with high-precision timestamping (otherwise there are races) and the * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off() - * appropriately. See also @max_vblank_count and - * &drm_crtc_funcs.get_vblank_counter. + * appropriately. Also, see @max_vblank_count, + * &drm_crtc_funcs.get_vblank_counter and + * &drm_vblank_crtc_config.disable_immediate. */ bool vblank_disable_immediate; diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index 2a1d01e5b56b8..fa085c44d4ca4 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -69,6 +69,9 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf); int drm_gem_prime_fd_to_handle(struct drm_device *dev, struct drm_file *file_priv, int prime_fd, uint32_t *handle); +struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags); int drm_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags, int *prime_fd); diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index c8f829b4307cb..151ab1e85b1b7 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -78,6 +78,31 @@ struct drm_pending_vblank_event { } event; }; +/** + * struct drm_vblank_crtc_config - vblank configuration for a CRTC + */ +struct drm_vblank_crtc_config { + /** + * @offdelay_ms: Vblank off delay in ms, used to determine how long + * &drm_vblank_crtc.disable_timer waits before disabling. + * + * Defaults to the value of drm_vblank_offdelay in drm_crtc_vblank_on(). + */ + int offdelay_ms; + + /** + * @disable_immediate: See &drm_device.vblank_disable_immediate + * for the exact semantics of immediate vblank disabling. + * + * Additionally, this tracks the disable immediate value per crtc, just + * in case it needs to differ from the default value for a given device. + * + * Defaults to the value of &drm_device.vblank_disable_immediate in + * drm_crtc_vblank_on(). + */ + bool disable_immediate; +}; + /** * struct drm_vblank_crtc - vblank tracking for a CRTC * @@ -99,8 +124,8 @@ struct drm_vblank_crtc { wait_queue_head_t queue; /** * @disable_timer: Disable timer for the delayed vblank disabling - * hysteresis logic. Vblank disabling is controlled through the - * drm_vblank_offdelay module option and the setting of the + * hysteresis logic. Vblank disabling is controlled through + * &drm_vblank_crtc_config.offdelay_ms and the setting of the * &drm_device.max_vblank_count value. */ struct timer_list disable_timer; @@ -198,6 +223,12 @@ struct drm_vblank_crtc { */ struct drm_display_mode hwmode; + /** + * @config: Stores vblank configuration values for a given CRTC. + * Also, see drm_crtc_vblank_on_config(). + */ + struct drm_vblank_crtc_config config; + /** * @enabled: Tracks the enabling state of the corresponding &drm_crtc to * avoid double-disabling and hence corrupting saved state. Needed by @@ -247,6 +278,8 @@ void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe); void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); void drm_crtc_vblank_off(struct drm_crtc *crtc); void drm_crtc_vblank_reset(struct drm_crtc *crtc); +void drm_crtc_vblank_on_config(struct drm_crtc *crtc, + const struct drm_vblank_crtc_config *config); void drm_crtc_vblank_on(struct drm_crtc *crtc); u64 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc); void drm_crtc_vblank_restore(struct drm_crtc *crtc); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 5acc64954a883..150c5906c590f 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -363,10 +363,12 @@ struct drm_sched_job { * drm_sched_job_add_dependency() and * drm_sched_job_add_implicit_dependencies(). */ +#ifdef HAVE_STRUCT_XARRAY struct xarray dependencies; /** @last_dependency: tracks @dependencies as they signal */ unsigned long last_dependency; +#endif /** * @submit_ts: @@ -554,6 +556,7 @@ int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, u32 credits, void *owner); void drm_sched_job_arm(struct drm_sched_job *job); + int drm_sched_job_add_dependency(struct drm_sched_job *job, struct dma_fence *fence); int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job, @@ -567,7 +570,6 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, struct drm_gem_object *obj, bool write); - void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, struct drm_gpu_scheduler **sched_list, unsigned int num_sched_list); diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index ef0f52f56ebc6..3ea0f0a7b0c93 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -35,8 +35,13 @@ #include #include +#include #include "ttm_device.h" +#ifndef HAVE_CONFIG_H +#define HAVE_DRM_GEM_OBJECT_RESV 1 +#define HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG 1 +#endif /* Default number of pre-faulted pages in the TTM fault handler */ #if CONFIG_PGTABLE_LEVELS > 2 @@ -139,6 +144,11 @@ struct ttm_buffer_object { * reservation lock. */ struct sg_table *sg; + +#if !defined(HAVE_DRM_GEM_OBJECT_RESV) + struct dma_resv *resv; + struct dma_resv ttm_resv; +#endif }; #define TTM_BO_MAP_IOMEM_MASK 0x80 @@ -222,6 +232,14 @@ ttm_bo_get_unless_zero(struct ttm_buffer_object *bo) return bo; } +#if defined(HAVE_DRM_GEM_OBJECT_RESV) +#define amdkcl_ttm_resv(bo) ((bo)->base._resv) +#define amdkcl_ttm_resvp(bo) ((bo)->base.resv) +#else +#define amdkcl_ttm_resv(bo) ((bo)->ttm_resv) +#define amdkcl_ttm_resvp(bo) ((bo)->resv) +#endif + /** * ttm_bo_reserve: * @@ -256,14 +274,14 @@ static inline int ttm_bo_reserve(struct ttm_buffer_object *bo, if (WARN_ON(ticket)) return -EBUSY; - success = dma_resv_trylock(bo->base.resv); + success = dma_resv_trylock(amdkcl_ttm_resvp(bo)); return success ? 0 : -EBUSY; } if (interruptible) - ret = dma_resv_lock_interruptible(bo->base.resv, ticket); + ret = dma_resv_lock_interruptible(amdkcl_ttm_resvp(bo), ticket); else - ret = dma_resv_lock(bo->base.resv, ticket); + ret = dma_resv_lock(amdkcl_ttm_resvp(bo), ticket); if (ret == -EINTR) return -ERESTARTSYS; return ret; @@ -284,13 +302,13 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, struct ww_acquire_ctx *ticket) { if (interruptible) { - int ret = dma_resv_lock_slow_interruptible(bo->base.resv, + int ret = dma_resv_lock_slow_interruptible(amdkcl_ttm_resvp(bo), ticket); if (ret == -EINTR) ret = -ERESTARTSYS; return ret; } - dma_resv_lock_slow(bo->base.resv, ticket); + dma_resv_lock_slow(amdkcl_ttm_resvp(bo), ticket); return 0; } @@ -335,7 +353,7 @@ static inline void ttm_bo_move_null(struct ttm_buffer_object *bo, static inline void ttm_bo_unreserve(struct ttm_buffer_object *bo) { ttm_bo_move_to_lru_tail_unlocked(bo); - dma_resv_unlock(bo->base.resv); + dma_resv_unlock(amdkcl_ttm_resvp(bo)); } /** @@ -382,21 +400,43 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map); int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map); void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map); int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo); + +/** + * ttm_bo_mmap - mmap out of the ttm device address space. + * + * @filp: filp as input from the mmap method. + * @vma: vma as input from the mmap method. + * @bdev: Pointer to the ttm_device with the address space manager. + * + * This function is intended to be called by the device mmap method. + * if the device address space is to be backed by the bo manager. + */ +int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, + struct ttm_device *bdev); + int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags); void ttm_bo_pin(struct ttm_buffer_object *bo); void ttm_bo_unpin(struct ttm_buffer_object *bo); + int ttm_mem_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_operation_ctx *ctx, struct ww_acquire_ctx *ticket); + vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, struct vm_fault *vmf); vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, pgprot_t prot, pgoff_t num_prefault); + +#if defined(HAVE_VM_OPERATIONS_STRUCT_FAULT_1ARG) vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf); +#else +vm_fault_t ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +#endif + void ttm_bo_vm_open(struct vm_area_struct *vma); void ttm_bo_vm_close(struct vm_area_struct *vma); int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index c22f30535c848..3aab0c5a62bad 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -151,6 +151,24 @@ struct ttm_device_funcs { struct ttm_resource *new_mem, struct ttm_place *hop); +#ifdef HAVE_STRUCT_DRM_DRV_GEM_OPEN_OBJECT_CALLBACK + /** + * struct ttm_bo_driver_member verify_access + * + * @bo: Pointer to a buffer object. + * @filp: Pointer to a struct file trying to access the object. + * + * Called from the map / write / read methods to verify that the + * caller is permitted to access the buffer object. + * This member may be set to NULL, which will refuse this kind of + * access for all buffer objects. + * This function should return 0 if access is granted, -EPERM otherwise. + */ + int (*verify_access)(struct ttm_buffer_object *bo, + struct file *filp); +#endif + + /** * Hook to notify driver about a resource delete. */ diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 69769355139f2..e909fc0cba01c 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -36,7 +36,7 @@ #include #define TTM_MAX_BO_PRIORITY 4U -#define TTM_NUM_MEM_TYPES 8 +#define TTM_NUM_MEM_TYPES 12 struct ttm_device; struct ttm_resource_manager; diff --git a/include/kcl/backport/kcl_bitmap.h b/include/kcl/backport/kcl_bitmap.h new file mode 100644 index 0000000000000..1382530929001 --- /dev/null +++ b/include/kcl/backport/kcl_bitmap.h @@ -0,0 +1,38 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KCL_BACKPORT_KCL_BITMAP_H_ +#define __KCL_BACKPORT_KCL_BITMAP_H_ + +#include +#include + +#ifndef HAVE_BITMAP_FUNCS +#define bitmap_alloc kcl_bitmap_alloc +#define bitmap_zalloc kcl_bitmap_zalloc +#define bitmap_free kcl_bitmap_free +#endif /* HAVE_BITMAP_FUNCS */ + +#ifndef HAVE_BITMAP_TO_ARR32 +#define bitmap_to_arr32 kcl_bitmap_to_arr32 +#endif /* HAVE_BITMAP_TO_ARR32 */ + +#endif /* KCL_BITMAP_H */ diff --git a/include/kcl/backport/kcl_device_cgroup_backport.h b/include/kcl/backport/kcl_device_cgroup_backport.h new file mode 100644 index 0000000000000..9bac47907e956 --- /dev/null +++ b/include/kcl/backport/kcl_device_cgroup_backport.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_DEVICE_CGROUP_BACKPORT_H +#define AMDKCL_DEVICE_CGROUP_BACKPORT_H + +#include + +#ifndef HAVE_DEVCGROUP_CHECK_PERMISSION +#define devcgroup_check_permission _kcl_devcgroup_check_permission +#endif /* HAVE_DEVCGROUP_CHECK_PERMISSION */ +#endif diff --git a/include/kcl/backport/kcl_dm_tracepoint.h b/include/kcl/backport/kcl_dm_tracepoint.h new file mode 100644 index 0000000000000..7d0e772c51ece --- /dev/null +++ b/include/kcl/backport/kcl_dm_tracepoint.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KCL_BACKPORT_KCL_DM_TRACEPOINT_H +#define KCL_BACKPORT_KCL_DM_TRACEPOINT_H + +#ifndef DECLARE_EVENT_NOP +#define DECLARE_EVENT_NOP(name, proto, args) \ + static inline void trace_##name(proto) \ + { } \ + static inline bool trace_##name##_enabled(void) \ + { \ + return false; \ + } + +#define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) \ + DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) + +#define DEFINE_EVENT_NOP(template, name, proto, args) \ + DECLARE_EVENT_NOP(name, PARAMS(proto), PARAMS(args)) +#endif + +#endif /* KCL_BACKPORT_KCL_DM_TRACEPOINT_H */ diff --git a/include/kcl/backport/kcl_drm_atomic_helper_backport.h b/include/kcl/backport/kcl_drm_atomic_helper_backport.h new file mode 100644 index 0000000000000..eaa2464b77353 --- /dev/null +++ b/include/kcl/backport/kcl_drm_atomic_helper_backport.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2014 Red Hat + * Copyright (C) 2014 Intel Corp. + * Copyright (C) 2018 Intel Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark + * Daniel Vetter + */ +#ifndef AMDKCL_DRM_ATOMIC_HELPER_BACKPORT_H +#define AMDKCL_DRM_ATOMIC_HELPER_BACKPORT_H + +#include + +/* + * commit v4.14-rc4-1-g78279127253a + * drm/atomic: Unref duplicated drm_atomic_state in drm_atomic_helper_resume() + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 15, 0) +static inline +int _kcl_drm_atomic_helper_resume(struct drm_device *dev, + struct drm_atomic_state *state) +{ + unsigned int prev, after; + int ret; + + prev = kref_read(&state->ref); + + drm_atomic_state_get(state); + ret = drm_atomic_helper_resume(dev, state); + + after = kref_read(&state->ref); + drm_atomic_state_put(state); + if (prev != after) + drm_atomic_state_put(state); + + return ret; +} +#define drm_atomic_helper_resume _kcl_drm_atomic_helper_resume +#endif + +#ifdef AMDKCL__DRM_ATOMIC_HELPER_PLANE_RESET +#define __drm_atomic_helper_plane_reset _kcl__drm_atomic_helper_plane_reset +#endif /* AMDKCL__DRM_ATOMIC_HELPER_PLANE_RESET */ + +#endif diff --git a/include/kcl/backport/kcl_drm_backport.h b/include/kcl/backport/kcl_drm_backport.h new file mode 100644 index 0000000000000..c17c10af84c09 --- /dev/null +++ b/include/kcl/backport/kcl_drm_backport.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef AMDKCL_DRM_BACKPORT_H +#define AMDKCL_DRM_BACKPORT_H + +/* + * commit v4.10-rc3-539-g086f2e5cde74 + * drm: debugfs: Remove all files automatically on cleanup + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 11, 0) +#define AMDKCL_AMDGPU_DEBUGFS_CLEANUP +#endif + +#if DRM_VERSION_CODE >= DRM_VERSION(4, 17, 0) +#define AMDKCL_AMDGPU_DMABUF_OPS +#endif + +/* + * commit v5.4-rc4-1120-gb3fac52c5193 + * drm: share address space for dma bufs + */ +#if DRM_VERSION_CODE < DRM_VERSION(5, 5, 0) +#define AMDKCL_DMA_BUF_SHARE_ADDR_SPACE +#endif + +/* + * commit v4.13-rc2-365-g144a7999d633 + * drm: Handle properties in the core for atomic drivers + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 14, 0) +#define AMDKCL_DRM_CONNECTOR_FUNCS_DPMS_MANDATORY +#endif + +#endif/*AMDKCL_DRM_BACKPORT_H*/ diff --git a/include/kcl/backport/kcl_drm_cache_backport.h b/include/kcl/backport/kcl_drm_cache_backport.h new file mode 100644 index 0000000000000..bc936463073e5 --- /dev/null +++ b/include/kcl/backport/kcl_drm_cache_backport.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef AMDKCL_DRM_CACHE_BACKPORT_H +#define AMDKCL_DRM_CACHE_BACKPORT_H + +#include +#include + +#define drm_arch_can_wc_memory kcl_drm_arch_can_wc_memory + +#endif /* AMDKCL_DRM_CACHE_BACKPORT_H */ diff --git a/include/kcl/backport/kcl_drm_dp_helper_backport.h b/include/kcl/backport/kcl_drm_dp_helper_backport.h new file mode 100644 index 0000000000000..61b4a14bb0151 --- /dev/null +++ b/include/kcl/backport/kcl_drm_dp_helper_backport.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_DRM_DP_HELPER_BACKPORT_H_ +#define _KCL_DRM_DP_HELPER_BACKPORT_H_ + +#include + +/* + * commit v4.19-rc1-100-g5ce70c799ac2 + * drm_dp_cec: check that aux has a transfer function + */ +#if defined(AMDKCL_DRM_DP_CEC_XXX_CHECK_CB) +#define drm_dp_cec_irq _kcl_drm_dp_cec_irq +#define drm_dp_cec_set_edid _kcl_drm_dp_cec_set_edid +#define drm_dp_cec_unset_edid _kcl_drm_dp_cec_unset_edid +#endif + +#if !defined(HAVE_DRM_DP_CEC_REGISTER_CONNECTOR_PP) +#define drm_dp_cec_register_connector _kcl_drm_dp_cec_register_connector +#endif + +#if !defined(HAVE_DRM_DP_READ_DPCD_CAPS) +int _kcl_drm_dp_read_dpcd_caps(struct drm_dp_aux *aux, + u8 dpcd[DP_RECEIVER_CAP_SIZE]); +static inline int drm_dp_read_dpcd_caps(struct drm_dp_aux *aux, + u8 dpcd[DP_RECEIVER_CAP_SIZE]) +{ + return _kcl_drm_dp_read_dpcd_caps(aux, dpcd); +} +#endif +#endif diff --git a/include/kcl/backport/kcl_drm_dp_mst_helper_backport.h b/include/kcl/backport/kcl_drm_dp_mst_helper_backport.h new file mode 100644 index 0000000000000..a84cd2ac22cc2 --- /dev/null +++ b/include/kcl/backport/kcl_drm_dp_mst_helper_backport.h @@ -0,0 +1,120 @@ +/* + * Copyright © 2014 Red Hat + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ +#ifndef _KCL_DRM_DP_MST_HELPER_BACKPORT_H_ +#define _KCL_DRM_DP_MST_HELPER_BACKPORT_H_ + +#include + +/* Copied from drivers/gpu/drm/drm_dp_mst_topology.c and modified for KCL */ +#ifndef HAVE_DRM_DP_CALC_PBN_MODE_3ARGS +static inline +int _kcl_drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc) +{ +#ifndef HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H + if (dsc) + return DIV_ROUND_UP_ULL(mul_u32_u32(clock * (bpp / 16), 64 * 1006), + 8 * 54 * 1000 * 1000); +#endif + return drm_dp_calc_pbn_mode(clock, bpp +#ifdef HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H + << 4 +#endif + ); +} +#define drm_dp_calc_pbn_mode _kcl_drm_dp_calc_pbn_mode +#endif + + +#if !defined(HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS) +#if !defined(HAVE_DRM_DP_ATOMIC_FIND_VCPI_SLOTS_5ARGS) +static inline +int _kcl_drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state, + struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, int pbn, + int pbn_div) +{ + int pbn_backup; + int req_slots; + + if (pbn_div > 0) { + pbn_backup = mgr->pbn_div; + mgr->pbn_div = pbn_div; + } + + req_slots = drm_dp_atomic_find_vcpi_slots(state, mgr, port, pbn); + + if (pbn_div > 0) + mgr->pbn_div = pbn_backup; + + return req_slots; +} +#define drm_dp_atomic_find_vcpi_slots _kcl_drm_dp_atomic_find_vcpi_slots +#endif /* HAVE_DRM_DP_ATOMIC_FIND_VCPI_SLOTS_5ARGS */ + +static inline +int _kcl_drm_dp_atomic_find_time_slots(struct drm_atomic_state *state, + struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, int pbn, + int pbn_div) +{ + return drm_dp_atomic_find_vcpi_slots(state, mgr, port, pbn, pbn_div); +} +#define drm_dp_atomic_find_time_slots _kcl_drm_dp_atomic_find_time_slots +#endif /* HAVE_DRM_DP_ATOMIC_FIND_TIME_SLOTS */ + +#if !defined(HAVE_DRM_DP_ATOMIC_RELEASE_TIME_SLOTS) +#ifdef HAVE_DRM_DP_ATOMIC_RELEASE_VCPI_SLOTS_MST_PORT +static inline +int _kcl_drm_dp_atomic_release_time_slots(struct drm_atomic_state *state, + struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port) +{ + return drm_dp_atomic_release_vcpi_slots(state, mgr, port); +} +#define drm_dp_atomic_release_time_slots _kcl_drm_dp_atomic_release_time_slots +#endif +#endif + +#ifndef HAVE_DRM_DP_MST_TOPOLOGY_MGR_RESUME_2ARGS +static inline int +_kcl_drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, + bool sync) +{ + return drm_dp_mst_topology_mgr_resume(mgr); +} +#define drm_dp_mst_topology_mgr_resume _kcl_drm_dp_mst_topology_mgr_resume +#endif + +#ifdef HAVE_DRM_DP_ADD_PAYLOAD_PART2_THREE_ARGUMENTS +static inline int +_kcl_drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_atomic_payload *payload) +{ + struct drm_dp_mst_topology_state *mst_state; + + mst_state = to_drm_dp_mst_topology_state(mgr->base.state); + return drm_dp_add_payload_part2(mgr, mst_state->base.state, payload); +} +#define drm_dp_add_payload_part2 _kcl_drm_dp_add_payload_part2 +#endif + +#endif diff --git a/include/kcl/backport/kcl_drm_drv.h b/include/kcl/backport/kcl_drm_drv.h new file mode 100644 index 0000000000000..2fd32a57bb5d5 --- /dev/null +++ b/include/kcl/backport/kcl_drm_drv.h @@ -0,0 +1,58 @@ +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009-2010, Code Aurora Forum. + * Copyright 2016 Intel Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KCL_BACKPORT_KCL_DRM_DRV_H__ +#define __KCL_BACKPORT_KCL_DRM_DRV_H__ + +#include +/* + * v5.1-rc5-1150-gbd53280ef042 drm/drv: Fix incorrect resolution of merge conflict + * v5.1-rc2-5-g3f04e0a6cfeb drm: Fix drm_release() and device unplug + */ +#if DRM_VERSION_CODE < DRM_VERSION(5, 2, 0) +static inline +void _kcl_drm_dev_unplug(struct drm_device *dev) +{ + unsigned int prev, post; + + drm_dev_get(dev); + + prev = kref_read(&dev->ref); + drm_dev_unplug(dev); + post = kref_read(&dev->ref); + + if (prev == post) + drm_dev_put(dev); +} +#define drm_dev_unplug _kcl_drm_dev_unplug +#endif + + +#ifndef HAVE_DRM_FIRMWARE_DRIVERS_ONLY +#define drm_firmware_drivers_only vgacon_text_force +#endif /* HAVE_DRM_FIRMWARE_DRIVERS_ONLY */ + +#endif diff --git a/include/kcl/backport/kcl_drm_edid.h b/include/kcl/backport/kcl_drm_edid.h new file mode 100644 index 0000000000000..2076f6fe8b2b2 --- /dev/null +++ b/include/kcl/backport/kcl_drm_edid.h @@ -0,0 +1,20 @@ +#ifndef AMDKCL_BACKPORT_DRM_EDID_H +#define AMDKCL_BACKPORT_DRM_EDID_H + +#include + +#if !defined(HAVE_DRM_EDID_OVERRIDE_CONNECTOR_UPDATE) +#ifdef HAVE_DRM_ADD_OVERRIDE_EDID_MODES +static inline int _kcl_drm_edid_override_connector_update(struct drm_connector *connector) +{ + int ret; + + ret = drm_add_override_edid_modes(connector); + return ret; +} + +#define drm_edid_override_connector_update _kcl_drm_edid_override_connector_update +#endif +#endif + +#endif diff --git a/include/kcl/backport/kcl_drm_exec.h b/include/kcl/backport/kcl_drm_exec.h new file mode 100644 index 0000000000000..0d86a455cabdc --- /dev/null +++ b/include/kcl/backport/kcl_drm_exec.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_BACKPORT_KCL_DRM_EXEC_H +#define AMDKCL_BACKPORT_KCL_DRM_EXEC_H + +#include +#include + +#ifndef HAVE_DRM_EXEC_INIT_3_ARGUMENTS +static inline +void _kcl_drm_exec_init(struct drm_exec *exec, uint32_t flags, unsigned nr) +{ + return drm_exec_init(exec, flags); +} + +#define drm_exec_init _kcl_drm_exec_init +#endif /* HAVE_DRM_EXEC_INIT_3_ARGUMENTS */ + +#endif diff --git a/include/kcl/backport/kcl_drm_fb.h b/include/kcl/backport/kcl_drm_fb.h new file mode 100644 index 0000000000000..4b869a664735f --- /dev/null +++ b/include/kcl/backport/kcl_drm_fb.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Intel Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ +#ifndef KCL_BACKPORT_KCL_DRM_FB_H +#define KCL_BACKPORT_KCL_DRM_FB_H + +#include +#include + +#endif diff --git a/include/kcl/backport/kcl_drm_fbdev_ttm.h b/include/kcl/backport/kcl_drm_fbdev_ttm.h new file mode 100644 index 0000000000000..03ddc7699ddb1 --- /dev/null +++ b/include/kcl/backport/kcl_drm_fbdev_ttm.h @@ -0,0 +1,16 @@ +#ifndef __KCL_BACKPORT_KCL_DRM_DRV_H_ +#define __KCL_BACKPORT_KCL_DRM_DRV_H__ + +#include +#include + +#ifndef HAVE_DRM_DRM_FBDEV_TTM_H +static inline +void _kcl_drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp) +{ + return drm_fbdev_generic_setup(dev, preferred_bpp); +} +#define drm_fbdev_ttm_setup _kcl_drm_fbdev_ttm_setup +#endif + +#endif diff --git a/include/kcl/backport/kcl_drm_gem.h b/include/kcl/backport/kcl_drm_gem.h new file mode 100644 index 0000000000000..6f04e71ca35fd --- /dev/null +++ b/include/kcl/backport/kcl_drm_gem.h @@ -0,0 +1,45 @@ +/* + * GEM Graphics Execution Manager Driver Interfaces + * + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009-2010, Code Aurora Forum. + * All rights reserved. + * Copyright © 2014 Intel Corporation + * Daniel Vetter + * + * Author: Rickard E. (Rik) Faith + * Author: Gareth Hughes + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KCL_BACKPORT_KCL_DRM_GEM_H__ +#define __KCL_BACKPORT_KCL_DRM_GEM_H__ + +#include + +#if defined(HAVE_DRM_GEM_OBJECT_PUT) +#if defined(HAVE_DRM_GEM_OBJECT_PUT_SYMBOL) +#define drm_gem_object_put _kcl_drm_gem_object_put +#endif +#endif + +#endif diff --git a/include/kcl/backport/kcl_drm_prime.h b/include/kcl/backport/kcl_drm_prime.h new file mode 100644 index 0000000000000..de796e551b712 --- /dev/null +++ b/include/kcl/backport/kcl_drm_prime.h @@ -0,0 +1,66 @@ +/* + * Copyright © 2012 Red Hat + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009-2010, Code Aurora Forum. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Dave Airlie + * Rob Clark + * + */ + +// Copied from include/drm/drm_prime.h +#ifndef _KCL_BACKPORT_KCL__DRM_PRIME_H__H_ +#define _KCL_BACKPORT_KCL__DRM_PRIME_H__H_ + +#ifdef HAVE_DRM_DRMP_H +#include +#else +#include +#endif + +#ifndef HAVE_DRM_PRIME_PAGES_TO_SG_3ARGS +static inline +struct sg_table *_kcl_drm_prime_pages_to_sg(struct drm_device *dev, + struct page **pages, unsigned int nr_pages) +{ + pr_warn_once("legacy kernel with drm_prime_pages_to_sg() ignore segment size limits, which is buggy\n"); + return drm_prime_pages_to_sg(pages, nr_pages); +} +#define drm_prime_pages_to_sg _kcl_drm_prime_pages_to_sg +#endif + +#ifndef HAVE_DRM_GEM_PRIME_HANDLE_TO_FD +int _kcl_drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, + int *prime_fd); +#define drm_gem_prime_handle_to_fd _kcl_drm_gem_prime_handle_to_fd + +int _kcl_drm_gem_prime_fd_to_handle(struct drm_device *dev, + struct drm_file *file_priv, int prime_fd, + uint32_t *handle); +#define drm_gem_prime_fd_to_handle _kcl_drm_gem_prime_fd_to_handle +#endif + +#endif diff --git a/include/kcl/backport/kcl_drm_print.h b/include/kcl/backport/kcl_drm_print.h new file mode 100644 index 0000000000000..5dc86123ce523 --- /dev/null +++ b/include/kcl/backport/kcl_drm_print.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2016 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +// Copied from include/drm/drm_print.h +#ifndef _KCL_BACKPORT_KCL__DRM_PRINT_H__H_ +#define _KCL_BACKPORT_KCL__DRM_PRINT_H__H_ + +#include +#include + +#if !defined(HAVE_DRM_PRINT_BITS_4ARGS) && \ + defined(HAVE_DRM_PRINT_BITS) +static inline +void _kcl_drm_print_bits(struct drm_printer *p, unsigned long value, + const char * const bits[], unsigned int nbits) +{ + unsigned int from, to; + + from = ffs(value); + to = fls(value); + WARN_ON_ONCE(to > nbits); + + drm_print_bits(p, value, bits, from, nbits); +} +#define drm_print_bits _kcl_drm_print_bits +#endif + + +#ifndef HAVE_DRM_DBG_PRINTER +static inline +struct drm_printer _kcl_drm_dbg_printer(struct drm_device *drm, + enum drm_debug_category category, + const char *prefix) +{ + return drm_debug_printer(prefix); +} +#define drm_dbg_printer _kcl_drm_dbg_printer +#endif + +#endif diff --git a/include/kcl/backport/kcl_drm_probe_helper.h b/include/kcl/backport/kcl_drm_probe_helper.h new file mode 100644 index 0000000000000..3ac7310361bb4 --- /dev/null +++ b/include/kcl/backport/kcl_drm_probe_helper.h @@ -0,0 +1,16 @@ +#ifndef AMDKCL_BACKPORT_DRM_PROBE_HELPER_H +#define AMDKCL_BACKPORT_DRM_PROBE_HELPER_H + +#include + +#ifndef HAVE_DRM_KMS_HELPER_CONNECTOR_HOTPLUG_EVENT +static inline void _kcl_drm_kms_helper_connector_hotplug_event(struct drm_connector *connector) +{ + drm_kms_helper_hotplug_event(connector->dev); +} + +#define drm_kms_helper_connector_hotplug_event _kcl_drm_kms_helper_connector_hotplug_event + + +#endif +#endif diff --git a/include/kcl/backport/kcl_drm_vma_manager_backport.h b/include/kcl/backport/kcl_drm_vma_manager_backport.h new file mode 100644 index 0000000000000..b7b16df2f6d2c --- /dev/null +++ b/include/kcl/backport/kcl_drm_vma_manager_backport.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2013 David Herrmann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef AMDKCL_DRM_VMA_MANAGER_H +#define AMDKCL_DRM_VMA_MANAGER_H + +/* We make up offsets for buffer objects so we can recognize them at + * mmap time. pgoff in mmap is an unsigned long, so we need to make sure + * that the faked up offset will fit + */ +#include +#include + +/* Copied from include/drm/drm_vma_manager.h */ +#if (BITS_PER_LONG == 64) +#ifdef DRM_FILE_PAGE_OFFSET_START +#undef DRM_FILE_PAGE_OFFSET_START +#endif +#ifdef DRM_FILE_PAGE_OFFSET_SIZE +#undef DRM_FILE_PAGE_OFFSET_SIZE +#endif + +#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFULL >> PAGE_SHIFT) + 1) +#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFULL >> PAGE_SHIFT) * 4096) + +static inline void +kcl_drm_vma_offset_manager_adjust(struct drm_vma_offset_manager *mgr) +{ + u64 size; + + BUG_ON(!mgr); + + size = mgr->vm_addr_space_mm.head_node.hole_size; + if (size < DRM_FILE_PAGE_OFFSET_SIZE) + drm_vma_offset_manager_destroy(mgr); + else + return; + + drm_vma_offset_manager_init(mgr, + DRM_FILE_PAGE_OFFSET_START, + DRM_FILE_PAGE_OFFSET_SIZE); +} +#else +static inline void +kcl_drm_vma_offset_manager_adjust(struct drm_vma_offset_manager *mgr) +{ +} +#endif + +#endif diff --git a/include/kcl/backport/kcl_fence_backport.h b/include/kcl/backport/kcl_fence_backport.h new file mode 100644 index 0000000000000..a29c3293c6c88 --- /dev/null +++ b/include/kcl/backport/kcl_fence_backport.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef AMDKCL_FENCE_BACKPORT_H +#define AMDKCL_FENCE_BACKPORT_H +#include +#include + +/* + * commit v4.18-rc2-533-g418cc6ca0607 + * dma-fence: Allow wait_any_timeout for all fences) + */ +#ifdef AMDKCL_FENCE_WAIT_ANY_TIMEOUT +#define dma_fence_wait_any_timeout _kcl_fence_wait_any_timeout +#endif + +/* + * commit v4.9-rc2-472-gbcc004b629d2 + * dma-buf/fence: make timeout handling in fence_default_wait consistent (v2)) + * + * commit v4.9-rc2-473-g698c0f7ff216 + * dma-buf/fence: revert "don't wait when specified timeout is zero" (v2) + */ +#ifdef AMDKCL_FENCE_DEFAULT_WAIT_TIMEOUT + +#ifdef dma_fence_default_wait +#undef dma_fence_default_wait +#endif + +#define dma_fence_default_wait _kcl_fence_default_wait +#define dma_fence_wait_timeout _kcl_fence_wait_timeout +#endif + +/* + * commit v4.14-rc3-601-g5f72db59160c + * dma-buf/fence: Sparse wants __rcu on the object itself + */ +#ifdef AMDKCL_FENCE_GET_RCU_SAFE +#define dma_fence_get_rcu_safe _kcl_fence_get_rcu_safe +#endif +#endif diff --git a/include/kcl/backport/kcl_fs.h b/include/kcl/backport/kcl_fs.h new file mode 100644 index 0000000000000..200c92cd0f82f --- /dev/null +++ b/include/kcl/backport/kcl_fs.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_BACKPORT_KCL_FS_H +#define _KCL_BACKPORT_KCL_FS_H + +#include +#include + +#ifndef HAVE_KERNEL_WRITE_PPOS +#define kernel_write _kcl_kernel_write +#endif + +#endif diff --git a/include/kcl/backport/kcl_hmm.h b/include/kcl/backport/kcl_hmm.h new file mode 100644 index 0000000000000..7dad7453aaa89 --- /dev/null +++ b/include/kcl/backport/kcl_hmm.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2013 Red Hat Inc. + * + * Authors: Jérôme Glisse + * + * See Documentation/vm/hmm.rst for reasons and overview of what HMM is. + */ +#ifndef _KCL_BACKPORT_KCL_HMM_H +#define _KCL_BACKPORT_KCL_HMM_H + +#ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED +#include + +#ifndef HAVE_HMM_RANGE_FAULT_1ARG +static inline +int _kcl_hmm_range_fault(struct hmm_range *range) +{ + return hmm_range_fault(range, 0); +} +#define hmm_range_fault _kcl_hmm_range_fault +#endif /* HAVE_HMM_RANGE_FAULT_1ARG */ + +#endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ +#endif diff --git a/include/kcl/backport/kcl_io_backport.h b/include/kcl/backport/kcl_io_backport.h new file mode 100644 index 0000000000000..8fe78d238e6f4 --- /dev/null +++ b/include/kcl/backport/kcl_io_backport.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2006 PathScale, Inc. All Rights Reserved. + */ +#ifndef AMDKCL_IO_H +#define AMDKCL_IO_H + +#include +#include + +/* Copied from arch/x86/include/asm/io.h + * include/linux/io.h + */ +#if !defined(HAVE_ARCH_IO_RESERVE_FREE_MEMTYPE_WC) + +#ifdef CONFIG_X86 +extern int _kcl_arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void _kcl_arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc _kcl_arch_io_reserve_memtype_wc +#define arch_io_free_memtype_wc _kcl_arch_io_free_memtype_wc +#endif + +#ifndef arch_io_reserve_memtype_wc +/* + * On x86 PAT systems we have memory tracking that keeps track of + * the allowed mappings on memory ranges. This tracking works for + * all the in-kernel mapping APIs (ioremap*), but where the user + * wishes to map a range from a physical device into user memory + * the tracking won't be updated. This API is to be used by + * drivers which remap physical device pages into userspace, + * and wants to make sure they are mapped WC and not UC. + */ +static inline int arch_io_reserve_memtype_wc(resource_size_t base, + resource_size_t size) +{ + return 0; +} + +static inline void arch_io_free_memtype_wc(resource_size_t base, + resource_size_t size) +{ +} +#endif + +#endif /* HAVE_ARCH_IO_RESERVE_FREE_MEMTYPE_WC */ + +#endif /* AMDKCL_IO_H */ diff --git a/include/kcl/backport/kcl_kthread_backport.h b/include/kcl/backport/kcl_kthread_backport.h new file mode 100644 index 0000000000000..60732dc17f10e --- /dev/null +++ b/include/kcl/backport/kcl_kthread_backport.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_KTHREAD_BACKPORT_H +#define AMDKCL_KTHREAD_BACKPORT_H +#include +#include +#include + +#if !defined(HAVE___KTHREAD_SHOULD_PARK) +#define __kthread_should_park __kcl_kthread_should_park +#endif + +#endif diff --git a/include/kcl/backport/kcl_mce.h b/include/kcl/backport/kcl_mce.h new file mode 100644 index 0000000000000..08c69209a1a49 --- /dev/null +++ b/include/kcl/backport/kcl_mce.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_BACKPORT_KCL_MCE_H +#define _KCL_BACKPORT_KCL_MCE_H + +#include + +#ifdef CONFIG_X86_MCE_AMD +#ifndef HAVE_SMCA_GET_BANK_TYPE_WITH_TWO_ARGUMENTS +#define smca_get_bank_type _kcl_smca_get_bank_type +#endif /* HAVE_SMCA_GET_BANK_TYPE_WITH_TWO_ARGUMENTS */ +#endif /* CONFIG_X86_MCE_AMD */ + +#endif /* _KCL_BACKPORT_KCL_MCE_H */ diff --git a/include/kcl/backport/kcl_migrate.h b/include/kcl/backport/kcl_migrate.h new file mode 100644 index 0000000000000..55a817d8cf2aa --- /dev/null +++ b/include/kcl/backport/kcl_migrate.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_BACKPORT_KCL_MIGRATE_H +#define _KCL_BACKPORT_KCL_MIGRATE_H + +#include + +/* Compatibility with kernels before ab09243aa95a ("mm/migrate.c: remove + * MIGRATE_PFN_LOCKED") + */ +#ifndef MIGRATE_PFN_LOCKED +#define MIGRATE_PFN_LOCKED 0 +#endif + +#endif diff --git a/include/kcl/backport/kcl_mm_backport.h b/include/kcl/backport/kcl_mm_backport.h new file mode 100644 index 0000000000000..27c77cd60bbea --- /dev/null +++ b/include/kcl/backport/kcl_mm_backport.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_MM_BACKPORT_H +#define AMDKCL_MM_BACKPORT_H +#include +#include +#include + +#ifndef HAVE_MMPUT_ASYNC +#define mmput_async _kcl_mmput_async +#endif + +#ifdef get_user_pages_remote +#undef get_user_pages_remote +#endif +#ifdef get_user_pages +#undef get_user_pages +#endif + +static inline +long kcl_get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas, int *locked) +{ +#if defined(HAVE_GET_USER_PAGES_REMOTE_REMOVE_TASK_STRUCT) + return get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); +#elif defined(HAVE_GET_USER_PAGES_REMOTE_LOCKED) + return get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, pages, vmas, locked); +#elif defined(HAVE_GET_USER_PAGES_REMOTE_GUP_FLAGS) + return get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, pages, vmas); +#elif defined(HAVE_GET_USER_PAGES_REMOTE_INTRODUCED) + return get_user_pages_remote(tsk, mm, start, nr_pages, !!(gup_flags & FOLL_WRITE), + !!(gup_flags & FOLL_FORCE), pages, vmas); +#elif defined(HAVE_GET_USER_PAGES_REMOTE_REMOVE_VMAS) + return get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, locked); +#else + return get_user_pages(tsk, mm, start, nr_pages, !!(gup_flags & FOLL_WRITE), + !!(gup_flags & FOLL_FORCE), pages, vmas); +#endif +} + +#ifndef HAVE_GET_USER_PAGES_GUP_FLAGS +static inline +long _kcl_get_user_pages(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas) +{ +#if defined(HAVE_GET_USER_PAGES_6ARGS) + return get_user_pages(start, nr_pages, !!(gup_flags & FOLL_WRITE), + !!(gup_flags & FOLL_FORCE), pages, vmas); +#elif defined(HAVE_GET_USER_PAGES_REMOVE_VMAS) + return get_user_pages(start, nr_pages, gup_flags, pages); +#else + return get_user_pages(current, current->mm, start, nr_pages, !!(gup_flags & FOLL_WRITE), + !!(gup_flags & FOLL_FORCE), pages, vmas); +#endif +} +#define get_user_pages _kcl_get_user_pages +#endif /* HAVE_GET_USER_PAGES_GUP_FLAGS */ + +#endif diff --git a/include/kcl/backport/kcl_numa_backport.h b/include/kcl/backport/kcl_numa_backport.h new file mode 100644 index 0000000000000..99251097a9e76 --- /dev/null +++ b/include/kcl/backport/kcl_numa_backport.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#ifndef AMDKCL_NUMA_BACKPORT_H +#define AMDKCL_NUMA_BACKPORT_H + +#if !defined(HAVE_PXM_TO_NODE) +extern int (*_kcl_pxm_to_node)(int pxm); +#define pxm_to_node _kcl_pxm_to_node +#endif + +#endif diff --git a/include/kcl/backport/kcl_pci_backport.h b/include/kcl/backport/kcl_pci_backport.h new file mode 100644 index 0000000000000..f75f4fbd7e7fa --- /dev/null +++ b/include/kcl/backport/kcl_pci_backport.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_PCI_BACKPORT_H +#define AMDKCL_PCI_BACKPORT_H + +#include +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +#define AMDKCL_PCIE_BRIDGE_PM_USABLE +#endif + +#endif diff --git a/include/kcl/backport/kcl_uaccess_backport.h b/include/kcl/backport/kcl_uaccess_backport.h new file mode 100644 index 0000000000000..e781a42201f49 --- /dev/null +++ b/include/kcl/backport/kcl_uaccess_backport.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_UACCESS_BACKPORT_H +#define AMDKCL_UACCESS_BACKPORT_H +#include + +static inline int kcl_access_ok(const void __user *addr, unsigned long size) +{ +#if !defined(HAVE_ACCESS_OK_WITH_TWO_ARGUMENTS) + return access_ok(VERIFY_WRITE, (addr), (size)); +#else + return access_ok((addr), (size)); +#endif +} +#endif diff --git a/include/kcl/backport/kcl_workqueue_backport.h b/include/kcl/backport/kcl_workqueue_backport.h new file mode 100644 index 0000000000000..db95877443f53 --- /dev/null +++ b/include/kcl/backport/kcl_workqueue_backport.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef KCL_LINUX_WORKQUEUE_BACKPORT_H +#define KCL_LINUX_WORKQUEUE_BACKPORT_H + +#include + +#ifndef HAVE_CANCEL_WORK +extern bool kcl_cancel_work(struct work_struct *work); +#define cancel_work kcl_cancel_work +#endif + +/* Copied from kernel/workqueue.c and modified for KCL */ +#ifndef HAVE_QUEUE_WORK_NODE +static inline +bool _kcl_queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work) +{ + return queue_work(wq, work); +} +#define queue_work_node _kcl_queue_work_node +#endif +#endif /* KCL_LINUX_WORKQUEUE_BACKPORT_H */ diff --git a/include/kcl/backport/kcl_ww_mutex.h b/include/kcl/backport/kcl_ww_mutex.h new file mode 100644 index 0000000000000..101a5b8aacafa --- /dev/null +++ b/include/kcl/backport/kcl_ww_mutex.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance + * + * Original mutex implementation started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * Wait/Die implementation: + * Copyright (C) 2013 Canonical Ltd. + * Choice of algorithm: + * Copyright (C) 2018 WMWare Inc. + * + * This file contains the main data structure and API definitions. + */ +#ifndef __KCL_BACKPORT_KCL_WW_MUTEX_H__ +#define __KCL_BACKPORT_KCL_WW_MUTEX_H__ + +#include + +#ifndef HAVE_WW_MUTEX_TRYLOCK_CONTEXT_ARG +static inline int _kcl_ww_mutex_trylock(struct ww_mutex *lock) +{ + return ww_mutex_trylock(lock); +} +#define ww_mutex_trylock(MUTEX, CTX) _kcl_ww_mutex_trylock(MUTEX) +#endif + +#endif diff --git a/include/kcl/header/asm/set_memory.h b/include/kcl/header/asm/set_memory.h new file mode 100644 index 0000000000000..4614c4c1c4630 --- /dev/null +++ b/include/kcl/header/asm/set_memory.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER__ASM_SET_MEMORY_H_H_ +#define _KCL_HEADER__ASM_SET_MEMORY_H_H_ + +#if defined(HAVE_ASM_SET_MEMORY_H) +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/drm/display/drm_dp.h b/include/kcl/header/drm/display/drm_dp.h new file mode 100644 index 0000000000000..fc1cc1a4bac8e --- /dev/null +++ b/include/kcl/header/drm/display/drm_dp.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DP_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DP_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_DP_H) +#include_next +#elif defined(HAVE_DRM_DISPLAY_DRM_DP_HELPER_H) +#include +#elif defined(HAVE_DRM_DP_DRM_DP_HELPER_H) +#include +#else +#include +#endif + +#endif + diff --git a/include/kcl/header/drm/display/drm_dp_helper.h b/include/kcl/header/drm/display/drm_dp_helper.h new file mode 100644 index 0000000000000..3435bd45d5669 --- /dev/null +++ b/include/kcl/header/drm/display/drm_dp_helper.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DP_HELPER_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DP_HELPER_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_DP_HELPER_H) +#include_next +#elif defined(HAVE_DRM_DP_DRM_DP_HELPER_H) +#include +#else +#include +#endif + +#endif + diff --git a/include/kcl/header/drm/display/drm_dp_mst_helper.h b/include/kcl/header/drm/display/drm_dp_mst_helper.h new file mode 100644 index 0000000000000..c667873640a00 --- /dev/null +++ b/include/kcl/header/drm/display/drm_dp_mst_helper.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DP_MST_HELPER_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DP_MST_HELPER_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H) +#include_next +#elif defined(HAVE_DRM_DP_DRM_DP_MST_HELPER_H) +#include +#else +#include +#endif + +#endif + diff --git a/include/kcl/header/drm/display/drm_dsc.h b/include/kcl/header/drm/display/drm_dsc.h new file mode 100644 index 0000000000000..7b4f143d14323 --- /dev/null +++ b/include/kcl/header/drm/display/drm_dsc.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DSC_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DSC_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_DSC_H) +#include_next +#else +#include +#endif + +#endif + diff --git a/include/kcl/header/drm/display/drm_dsc_helper.h b/include/kcl/header/drm/display/drm_dsc_helper.h new file mode 100644 index 0000000000000..162730616ccb2 --- /dev/null +++ b/include/kcl/header/drm/display/drm_dsc_helper.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_DSC_HELPER_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_DSC_HELPER_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_DSC_HELPER_H) +#include_next +#endif + +#endif + diff --git a/include/kcl/header/drm/display/drm_hdcp.h b/include/kcl/header/drm/display/drm_hdcp.h new file mode 100644 index 0000000000000..a3c3aad2a794d --- /dev/null +++ b/include/kcl/header/drm/display/drm_hdcp.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_DISPLAY_HDCP_H_INCLUDED_H_ +#define _KCL_HEADER_DRM_DISPLAY_HDCP_H_INCLUDED_H_ + +#ifdef HAVE_DRM_DISPLAY_DRM_HDCP_H +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/drm/display/drm_hdcp_helper.h b/include/kcl/header/drm/display/drm_hdcp_helper.h new file mode 100644 index 0000000000000..8805018a9a244 --- /dev/null +++ b/include/kcl/header/drm/display/drm_hdcp_helper.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_HDCP_HELPER_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_HDCP_HELPER_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_HDCP_HELPER_H) +#include_next +#else +#include +#endif + +#endif + diff --git a/include/kcl/header/drm/display/drm_hdmi_helper.h b/include/kcl/header/drm/display/drm_hdmi_helper.h new file mode 100644 index 0000000000000..da7492d32e946 --- /dev/null +++ b/include/kcl/header/drm/display/drm_hdmi_helper.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DISPLAY_DRM_HDMI_HELPER_H_H_ +#define _KCL_HEADER_DISPLAY_DRM_HDMI_HELPER_H_H_ + +#if defined(HAVE_DRM_DISPLAY_DRM_HDMI_HELPER_H) +#include_next +#endif + +#endif + diff --git a/include/kcl/header/drm/drmP.h b/include/kcl/header/drm/drmP.h new file mode 100644 index 0000000000000..008236685b081 --- /dev/null +++ b/include/kcl/header/drm/drmP.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRMP_H_H_ +#define _KCL_HEADER_DRMP_H_H_ + +#ifdef HAVE_DRM_DRMP_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/drm/drm_aperture.h b/include/kcl/header/drm/drm_aperture.h new file mode 100644 index 0000000000000..9197d9538fc69 --- /dev/null +++ b/include/kcl/header/drm/drm_aperture.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_APERTURE_H_H_ +#define _KCL_HEADER_DRM_APERTURE_H_H_ + +#if defined(HAVE_DRM_DRM_APERTURE_H) +#include_next +#endif + +#endif diff --git a/include/kcl/header/drm/drm_eld.h b/include/kcl/header/drm/drm_eld.h new file mode 100644 index 0000000000000..e531edccae0d7 --- /dev/null +++ b/include/kcl/header/drm/drm_eld.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_ELD_H_ +#define _KCL_HEADER_DRM_ELD_H_H_ + +#ifdef HAVE_DRM_DRM_ELD_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/drm/drm_exec.h b/include/kcl/header/drm/drm_exec.h new file mode 100644 index 0000000000000..62aff24d17425 --- /dev/null +++ b/include/kcl/header/drm/drm_exec.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_EXEC_H_H_ +#define _KCL_HEADER_DRM_EXEC_H_H_ + +#ifdef HAVE_DRM_DRM_EXEC_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/drm/drm_fbdev_generic.h b/include/kcl/header/drm/drm_fbdev_generic.h new file mode 100644 index 0000000000000..13b6f65c37f01 --- /dev/null +++ b/include/kcl/header/drm/drm_fbdev_generic.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_DRM_FBDEV_GENERIC_H_H_ +#define _KCL_HEADER_DRM_DRM_FBDEV_GENERIC_H_H_ + +#ifdef HAVE_DRM_DRM_FBDEV_GENERIC_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/drm/drm_fbdev_ttm.h b/include/kcl/header/drm/drm_fbdev_ttm.h new file mode 100644 index 0000000000000..dbf67afb91594 --- /dev/null +++ b/include/kcl/header/drm/drm_fbdev_ttm.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_DRM_FBDEV_TTM_H_H_ +#define _KCL_HEADER_DRM_DRM_FBDEV_TTM_H_H_ + +#ifdef HAVE_DRM_DRM_FBDEV_TTM_H +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/drm/drm_gem_atomic_helper.h b/include/kcl/header/drm/drm_gem_atomic_helper.h new file mode 100644 index 0000000000000..1eb467c2c3327 --- /dev/null +++ b/include/kcl/header/drm/drm_gem_atomic_helper.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_GEM_ATOMIC_HELPER_PREPARE_H_H_ +#define _KCL_HEADER_DRM_GEM_ATOMIC_HELPER_PREPARE_H_H_ + +#if defined(HAVE_DRM_DRM_GEM_ATOMIC_HELPER_H) +#include_next +#endif + +#endif diff --git a/include/kcl/header/drm/drm_managed.h b/include/kcl/header/drm/drm_managed.h new file mode 100644 index 0000000000000..d6f211d64b346 --- /dev/null +++ b/include/kcl/header/drm/drm_managed.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_KCL_DRM_MANAGED_H_H +#define _KCL_HEADER_KCL_DRM_MANAGED_H_H + +#ifdef HAVE_DRM_DRM_MANAGED_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/drm/drm_print.h b/include/kcl/header/drm/drm_print.h new file mode 100644 index 0000000000000..a6734c48c8eb5 --- /dev/null +++ b/include/kcl/header/drm/drm_print.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_PRINT_H_H_ +#define _KCL_HEADER_DRM_PRINT_H_H_ + +#include_next +#include + +#endif diff --git a/include/kcl/header/drm/drm_probe_helper.h b/include/kcl/header/drm/drm_probe_helper.h new file mode 100644 index 0000000000000..a454fe92ea203 --- /dev/null +++ b/include/kcl/header/drm/drm_probe_helper.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_PROBE_HELPER_H_H_ +#define _KCL_HEADER_DRM_PROBE_HELPER_H_H_ + +#ifdef HAVE_DRM_DRM_PROBE_HELPER_H +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/drm/drm_suballoc.h b/include/kcl/header/drm/drm_suballoc.h new file mode 100644 index 0000000000000..3eca4a8774ac4 --- /dev/null +++ b/include/kcl/header/drm/drm_suballoc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_SUBALLOC_H_H_ +#define _KCL_HEADER_DRM_SUBALLOC_H_H_ + +#ifdef HAVE_DRM_DRM_SUBALLOC_H +#include_next +#endif + +#endif + diff --git a/include/kcl/header/drm/task_barrier.h b/include/kcl/header/drm/task_barrier.h new file mode 100644 index 0000000000000..e93315f493f3e --- /dev/null +++ b/include/kcl/header/drm/task_barrier.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_DRM_TASK_BARRIER_H_H_ +#define _KCL_HEADER_DRM_TASK_BARRIER_H_H_ + +#ifdef HAVE_DRM_TASK_BARRIER_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/linux/acpi_amd_wbrf.h b/include/kcl/header/linux/acpi_amd_wbrf.h new file mode 100644 index 0000000000000..ecf5be29494d4 --- /dev/null +++ b/include/kcl/header/linux/acpi_amd_wbrf.h @@ -0,0 +1,9 @@ +#ifndef _KCL_HEADER___ACPI_AMD_WBRF_H___H_ +#define _KCL_HEADER___ACPI_AMD_WBRF_H___H_ + +#ifdef HAVE_LINUX_ACPI_AMD_WBRF_H +#include_next +#endif + +#endif + diff --git a/include/kcl/header/linux/apple-gmux.h b/include/kcl/header/linux/apple-gmux.h new file mode 100644 index 0000000000000..19c858a0be836 --- /dev/null +++ b/include/kcl/header/linux/apple-gmux.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_APPLE_GMUX_H_H +#define _KCL_HEADER_LINUX_APPLE_GMUX_H_H + +#ifdef HAVE_LINUX_APPLE_GMUX_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/linux/bits.h b/include/kcl/header/linux/bits.h new file mode 100644 index 0000000000000..28a84955dc780 --- /dev/null +++ b/include/kcl/header/linux/bits.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER__LINUX_BITS_H_H_ +#define _KCL_HEADER__LINUX_BITS_H_H_ + +#if defined(HAVE_LINUX_BITS_H) +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/linux/build_bug.h b/include/kcl/header/linux/build_bug.h new file mode 100644 index 0000000000000..d97f9812224e1 --- /dev/null +++ b/include/kcl/header/linux/build_bug.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_BUG_BUILD_H_H +#define _KCL_HEADER_LINUX_BUG_BUILD_H_H + +#ifdef HAVE_LINUX_BUILD_BUG_H +#include_next +#endif + +#endif \ No newline at end of file diff --git a/include/kcl/header/linux/cc_platform.h b/include/kcl/header/linux/cc_platform.h new file mode 100644 index 0000000000000..cea7cc0c28876 --- /dev/null +++ b/include/kcl/header/linux/cc_platform.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_CC_PLATFORM_H_H +#define _KCL_HEADER_LINUX_CC_PLATFORM_H_H + +#if defined(HAVE_LINUX_CC_PLATFORM_H) +#include_next +#endif + +#endif + diff --git a/include/kcl/header/linux/class.h b/include/kcl/header/linux/class.h new file mode 100644 index 0000000000000..595b34ca30dbe --- /dev/null +++ b/include/kcl/header/linux/class.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_CLASS_H_H_ +#define _KCL_HEADER_LINUX_CLASS_H_H_ + +#ifdef HAVE_LINUX_DEVICE_CLASS_H +#include_next +#endif + +#endif + diff --git a/include/kcl/header/linux/container_of.h b/include/kcl/header/linux/container_of.h new file mode 100644 index 0000000000000..cf1f8a85f216f --- /dev/null +++ b/include/kcl/header/linux/container_of.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_CONTAINER_OF_H_H +#define _KCL_HEADER_LINUX_CONTAINER_OF_H_H + +#if defined(HAVE_LINUX_CONTAINER_OF_H) +#include_next +#endif + +#endif + diff --git a/include/kcl/header/linux/dma-buf-map.h b/include/kcl/header/linux/dma-buf-map.h new file mode 100644 index 0000000000000..523dfcfabda8b --- /dev/null +++ b/include/kcl/header/linux/dma-buf-map.h @@ -0,0 +1,9 @@ +#ifndef _KCL_HEADER___DMA_BUF_MAP_H___H_ +#define _KCL_HEADER___DMA_BUF_MAP_H___H_ + +#ifdef HAVE_LINUX_DMA_BUF_MAP_H +#include_next +#endif + +#endif + diff --git a/include/kcl/header/linux/dma-fence-array.h b/include/kcl/header/linux/dma-fence-array.h new file mode 100644 index 0000000000000..bc3d2e4bbaca2 --- /dev/null +++ b/include/kcl/header/linux/dma-fence-array.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER__LINUX_DMA_FENCE_ARRAY_H_H_ +#define _KCL_HEADER__LINUX_DMA_FENCE_ARRAY_H_H_ + +#if !defined(HAVE_LINUX_FENCE_ARRAY_H) +#include_next +#else +#include_next +#endif + +#endif diff --git a/include/kcl/header/linux/dma-fence-chain.h b/include/kcl/header/linux/dma-fence-chain.h new file mode 100644 index 0000000000000..ff429da204f75 --- /dev/null +++ b/include/kcl/header/linux/dma-fence-chain.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER__LINUX_DMA_FENCE_CHAIN_H_H_ +#define _KCL_HEADER__LINUX_DMA_FENCE_CHAIN_H_H_ + +#if defined(HAVE_LINUX_DMA_FENCE_CHAIN_H) +#include_next +#endif + +#endif diff --git a/include/kcl/header/linux/dma-map-ops.h b/include/kcl/header/linux/dma-map-ops.h new file mode 100644 index 0000000000000..0bda5e05b7eb1 --- /dev/null +++ b/include/kcl/header/linux/dma-map-ops.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER__LINUX_DMA_MAP_OPS_H_H_ +#define _KCL_HEADER__LINUX_DMA_MAP_OPS_H_H_ + +#if defined(HAVE_LINUX_DMA_MAP_OPS_H) +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/linux/io-64-nonatomic-lo-hi.h b/include/kcl/header/linux/io-64-nonatomic-lo-hi.h new file mode 100644 index 0000000000000..0fa2e108091b7 --- /dev/null +++ b/include/kcl/header/linux/io-64-nonatomic-lo-hi.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_IO_64_NONATOMIC_LO_HI_H_H_ +#define _KCL_HEADER_LINUX_IO_64_NONATOMIC_LO_HI_H_H_ + +#ifdef HAVE_LINUX_IO_64_NONATOMIC_LO_HI_H +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/linux/iosys-map.h b/include/kcl/header/linux/iosys-map.h new file mode 100644 index 0000000000000..9ce52ad756e1d --- /dev/null +++ b/include/kcl/header/linux/iosys-map.h @@ -0,0 +1,10 @@ +#ifndef _KCL_HEADER___IOSYS_MAP_H___H_ +#define _KCL_HEADER___IOSYS_MAP_H___H_ + +#ifdef HAVE_LINUX_IOSYS_MAP_H +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/linux/pci-p2pdma.h b/include/kcl/header/linux/pci-p2pdma.h new file mode 100644 index 0000000000000..84ad226012bdc --- /dev/null +++ b/include/kcl/header/linux/pci-p2pdma.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_PCI_P2PDMA_H_H_ +#define _KCL_HEADER_LINUX_PCI_P2PDMA_H_H_ + +#ifdef HAVE_LINUX_PCI_P2PDMA_H +#include_next +#endif + +#endif diff --git a/include/kcl/header/linux/pgtable.h b/include/kcl/header/linux/pgtable.h new file mode 100644 index 0000000000000..27198a089c730 --- /dev/null +++ b/include/kcl/header/linux/pgtable.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_PGTABLE_H_H_ +#define _KCL_HEADER_LINUX_PGTABLE_H_H_ + +#ifdef HAVE_LINUX_PGTABLE_H +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/linux/processor.h b/include/kcl/header/linux/processor.h new file mode 100644 index 0000000000000..873ab8368cb12 --- /dev/null +++ b/include/kcl/header/linux/processor.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_PROCESSOR_H_H +#define _KCL_HEADER_LINUX_PROCESSOR_H_H + +#if defined(HAVE_LINUX_PROCESSOR_H) +#include_next +#endif + +#endif + diff --git a/include/kcl/header/linux/stdarg.h b/include/kcl/header/linux/stdarg.h new file mode 100644 index 0000000000000..c7564aec2d86d --- /dev/null +++ b/include/kcl/header/linux/stdarg.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _KCL_HEADER_LINUX_STDARG_H_H +#define _KCL_HEADER_LINUX_STDARG_H_H + +#if defined(HAVE_LINUX_STDARG_H) +#include_next +#else +#include +#endif + +#endif diff --git a/include/kcl/header/linux/units.h b/include/kcl/header/linux/units.h new file mode 100644 index 0000000000000..228273e685fc1 --- /dev/null +++ b/include/kcl/header/linux/units.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _KCL_HEADER_LINUX_UNITS_H_H +#define _KCL_HEADER_LINUX_UNITS_H_H + +#ifdef HAVE_LINUX_UNITS_H +#include_next +#endif + +#endif + diff --git a/include/kcl/header/linux/xarray.h b/include/kcl/header/linux/xarray.h new file mode 100644 index 0000000000000..80d73c2ed9065 --- /dev/null +++ b/include/kcl/header/linux/xarray.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _KCL_HEADER_LINUX_XARRAY_H_H +#define _KCL_HEADER_LINUX_XARRAY_H_H + +#ifdef HAVE_LINUX_XARRAY_H +#include_next +#endif + +#endif diff --git a/include/kcl/kcl_acpi.h b/include/kcl/kcl_acpi.h new file mode 100644 index 0000000000000..d6f499640f0b8 --- /dev/null +++ b/include/kcl/kcl_acpi.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * acpi.h - ACPI Interface + * + * Copyright (C) 2001 Paul Diefenbaugh + */ +#ifndef AMDKCL_ACPI_H +#define AMDKCL_ACPI_H + +/** + * interface change in mainline kernel 3.13 + * but only affect RHEL6 without backport + * v3.7-rc5-12-g95f8a082b9b1 ACPI / driver core: Introduce struct acpi_dev_node + * and related macros + * v3.12-8048-g7b1998116bbb ACPI / driver core: Store an ACPI device pointer in + * struct acpi_dev_node + */ + +#include + +/* Copied from include/linux/acpi.h> */ +#ifndef ACPI_HANDLE +#define ACPI_HANDLE(dev) DEVICE_ACPI_HANDLE(dev) +#endif + +#endif /* AMDKCL_ACPI_H */ diff --git a/include/kcl/kcl_acpi_amd_wbrf.h b/include/kcl/kcl_acpi_amd_wbrf.h new file mode 100644 index 0000000000000..b8178e740f171 --- /dev/null +++ b/include/kcl/kcl_acpi_amd_wbrf.h @@ -0,0 +1,94 @@ +/*Copy from include/linux/acpi_amd_wbrf.h*/ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Wifi Band Exclusion Interface (AMD ACPI Implementation) + * Copyright (C) 2023 Advanced Micro Devices + */ + +#ifndef _KCL_ACPI_AMD_WBRF_H +#define _KCL_ACPI_AMD_WBRF_H + +#ifndef HAVE_LINUX_ACPI_AMD_WBRF_H +#include +#include + +/* The maximum number of frequency band ranges */ +#define MAX_NUM_OF_WBRF_RANGES 11 + +/* Record actions */ +#define WBRF_RECORD_ADD 0x0 +#define WBRF_RECORD_REMOVE 0x1 + +/** + * struct freq_band_range - Wifi frequency band range definition + * @start: start frequency point (in Hz) + * @end: end frequency point (in Hz) + */ +struct freq_band_range { + u64 start; + u64 end; +}; + +/** + * struct wbrf_ranges_in_out - wbrf ranges info + * @num_of_ranges: total number of band ranges in this struct + * @band_list: array of Wifi band ranges + */ +struct wbrf_ranges_in_out { + u64 num_of_ranges; + struct freq_band_range band_list[MAX_NUM_OF_WBRF_RANGES]; +}; + +/** + * enum wbrf_notifier_actions - wbrf notifier actions index + * @WBRF_CHANGED: there was some frequency band updates. The consumers + * should retrieve the latest active frequency bands. + */ +enum wbrf_notifier_actions { + WBRF_CHANGED, +}; + +#if IS_ENABLED(CONFIG_AMD_WBRF) +bool acpi_amd_wbrf_supported_producer(struct device *dev); +int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in); +bool acpi_amd_wbrf_supported_consumer(struct device *dev); +int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out); +int amd_wbrf_register_notifier(struct notifier_block *nb); +int amd_wbrf_unregister_notifier(struct notifier_block *nb); +#else +static inline +bool acpi_amd_wbrf_supported_consumer(struct device *dev) +{ + return false; +} + +static inline +int acpi_amd_wbrf_add_remove(struct device *dev, uint8_t action, struct wbrf_ranges_in_out *in) +{ + return -ENODEV; +} + +static inline +bool acpi_amd_wbrf_supported_producer(struct device *dev) +{ + return false; +} +static inline +int amd_wbrf_retrieve_freq_band(struct device *dev, struct wbrf_ranges_in_out *out) +{ + return -ENODEV; +} +static inline +int amd_wbrf_register_notifier(struct notifier_block *nb) +{ + return -ENODEV; +} +static inline +int amd_wbrf_unregister_notifier(struct notifier_block *nb) +{ + return -ENODEV; +} +#endif /* CONFIG_AMD_WBRF */ + +#endif /* HAVE_LINUX_ACPI_AMD_WBRF */ +#endif /* _KCL_ACPI_AMD_WBRF_H */ diff --git a/include/kcl/kcl_acpi_table.h b/include/kcl/kcl_acpi_table.h new file mode 100644 index 0000000000000..849e8a58a2dbd --- /dev/null +++ b/include/kcl/kcl_acpi_table.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/****************************************************************************** + * + * Name: acpixf.h - External interfaces to the ACPI subsystem + * + * Copyright (C) 2000 - 2020, Intel Corp. + * + *****************************************************************************/ +#ifndef KCL_KCL_ACPI_TABLE_H +#define KCL_KCL_ACPI_TABLE_H + +#include + +#ifndef HAVE_ACPI_PUT_TABLE +void acpi_put_table(struct acpi_table_header *table); +#endif + +#endif diff --git a/include/kcl/kcl_apple-gmux.h b/include/kcl/kcl_apple-gmux.h new file mode 100644 index 0000000000000..4e478cb3a1e87 --- /dev/null +++ b/include/kcl/kcl_apple-gmux.h @@ -0,0 +1,22 @@ +#ifndef AMDKCL_APPLE_GMUX_H +#define AMDKCL_APPLE_GMUX_H + +#include +#include + +#ifndef HAVE_APPLE_GMUX_DETECT +#if IS_ENABLED(CONFIG_APPLE_GMUX) +static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret) +{ + pr_warn_once("legacy kernel without apple_gmux_detect()\n"); + return false; +} +#else +static inline bool apple_gmux_detect(struct pnp_dev *pnp_dev, bool *indexed_ret) +{ + return false; +} +#endif +#endif + +#endif /* AMDKCL_APPLE_GMUX_H */ diff --git a/include/kcl/kcl_backlight.h b/include/kcl/kcl_backlight.h new file mode 100644 index 0000000000000..1d06b61502c3c --- /dev/null +++ b/include/kcl/kcl_backlight.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Backlight Lowlevel Control Abstraction + * + * Copyright (C) 2003,2004 Hewlett-Packard Company + * + */ +#ifndef AMDKCL_BACKLIGHT_H +#define AMDKCL_BACKLIGHT_H + +#include +#ifndef HAVE_BACKLIGHT_DEVICE_SET_BRIGHTNESS +int backlight_device_set_brightness(struct backlight_device *bd, + unsigned long brightness); +#endif /* HAVE_BACKLIGHT_DEVICE_SET_BRIGHTNESS */ +#endif diff --git a/include/kcl/kcl_bitmap.h b/include/kcl/kcl_bitmap.h new file mode 100644 index 0000000000000..f2c0863b7b7d8 --- /dev/null +++ b/include/kcl/kcl_bitmap.h @@ -0,0 +1,54 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KCL_BITMAP_H +#define KCL_BITMAP_H + +#ifndef HAVE_BITMAP_FUNCS +/* Copied from include/linux/bitmap.h*/ + +/* + * v4.17-3-gc42b65e363ce + * bitmap: Add bitmap_alloc(), bitmap_zalloc() and bitmap_free() + */ + +/* + * Allocation and deallocation of bitmap. + * Provided in lib/bitmap.c to avoid circular dependency. + */ +unsigned long *kcl_bitmap_alloc(unsigned int nbits, gfp_t flags); +unsigned long *kcl_bitmap_zalloc(unsigned int nbits, gfp_t flags); +void kcl_bitmap_free(const unsigned long *bitmap); +#endif /* HAVE_BITMAP_FUNCS */ + +/* copy form bitmap.h */ +#ifndef HAVE_BITMAP_TO_ARR32 +#if BITS_PER_LONG == 64 +void kcl_bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, + unsigned int nbits); +#else +#define kcl_bitmap_to_arr32(buf, bitmap, nbits) \ + bitmap_copy_clear_tail((unsigned long *) (buf), \ + (const unsigned long *) (bitmap), (nbits)) +#endif +#endif /* HAVE_BITMAP_TO_ARR32 */ + +#endif /* KCL_BITMAP_H */ diff --git a/include/kcl/kcl_bitops.h b/include/kcl/kcl_bitops.h new file mode 100644 index 0000000000000..f022f59a6a772 --- /dev/null +++ b/include/kcl/kcl_bitops.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_BITOPS_BACKPORT_H +#define AMDKCL_BITOPS_BACKPORT_H + +#include +/* Copied froma include/linux/bitops.h */ +#ifndef BITS_PER_TYPE +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#endif + +#endif diff --git a/include/kcl/kcl_build_bug.h b/include/kcl/kcl_build_bug.h new file mode 100644 index 0000000000000..7abac2512a33d --- /dev/null +++ b/include/kcl/kcl_build_bug.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_LINUX_BUILD_BUG_H +#define AMDKCL_LINUX_BUILD_BUG_H + +#include + +#ifndef static_assert +#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) +#define __static_assert(expr, msg, ...) _Static_assert(expr, msg) +#endif + +#endif /* AMDKCL_LINUX_BUILD_BUG_H */ \ No newline at end of file diff --git a/include/kcl/kcl_capability.h b/include/kcl/kcl_capability.h new file mode 100644 index 0000000000000..52448ad625f96 --- /dev/null +++ b/include/kcl/kcl_capability.h @@ -0,0 +1,35 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _KCL_KCL_CAPABILITY_H +#define _KCL_KCL_CAPABILITY_H + +#include + +#ifndef CAP_CHECKPOINT_RESTORE +#define CAP_CHECKPOINT_RESTORE CAP_SYS_ADMIN +#endif + +#ifndef CAP_PERFMON +#define CAP_PERFMON 38 +#endif + +#endif diff --git a/include/kcl/kcl_cc_platform.h b/include/kcl/kcl_cc_platform.h new file mode 100644 index 0000000000000..8a2d455442e4f --- /dev/null +++ b/include/kcl/kcl_cc_platform.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ +#ifndef AMDKCL_CC_PLATFORM_H +#define AMDKCL_CC_PLATFORM_H + +#ifndef HAVE_LINUX_CC_PLATFORM_H +/** + * enum cc_attr - Confidential computing attributes + * + * These attributes represent confidential computing features that are + * currently active. + */ +enum cc_attr { + /** + * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active + * + * The platform/OS is running with active memory encryption. This + * includes running either as a bare-metal system or a hypervisor + * and actively using memory encryption or as a guest/virtual machine + * and actively using memory encryption. + * + * Examples include SME, SEV and SEV-ES. + */ + CC_ATTR_MEM_ENCRYPT, + + /** + * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active + * + * The platform/OS is running as a bare-metal system or a hypervisor + * and actively using memory encryption. + * + * Examples include SME. + */ + CC_ATTR_HOST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption. + * + * Examples include SEV and SEV-ES. + */ + CC_ATTR_GUEST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption and register state encryption. + * + * Examples include SEV-ES. + */ + CC_ATTR_GUEST_STATE_ENCRYPT, +}; + +static inline bool cc_platform_has(enum cc_attr attr) { return false; } + +#endif /* HAVE_LINUX_CC_PLATFORM_H */ +#endif diff --git a/include/kcl/kcl_class.h b/include/kcl/kcl_class.h new file mode 100644 index 0000000000000..fbce818309960 --- /dev/null +++ b/include/kcl/kcl_class.h @@ -0,0 +1,17 @@ +#ifndef __AMDKCL_CLASS_H__ +#define __AMDKCL_CLASS_H__ + +#ifdef HAVE_LINUX_DEVICE_CLASS_H +#include +#endif +#include +static inline struct class* kcl_class_create(struct module *owner, const char* name) +{ +#ifdef HAVE_ONE_ARGUMENT_OF_CLASS_CREATE + return class_create(name); +#else + return class_create(owner, name); +#endif +} +#endif + diff --git a/include/kcl/kcl_compat.h b/include/kcl/kcl_compat.h new file mode 100644 index 0000000000000..80bcd236bd4de --- /dev/null +++ b/include/kcl/kcl_compat.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_COMPATE_H +#define AMDKCL_COMPATE_H + +#include + +#if !defined(HAVE_IN_COMPAT_SYSCALL) +#ifdef CONFIG_COMPAT +static inline bool in_compat_syscall(void) { return is_compat_task(); } +#else +static inline bool in_compat_syscall(void) { return false; } +#endif +#endif + +#endif /* AMDKCL_COMPATE_H */ diff --git a/include/kcl/kcl_compiler_attributes.h b/include/kcl/kcl_compiler_attributes.h new file mode 100644 index 0000000000000..e844e68139479 --- /dev/null +++ b/include/kcl/kcl_compiler_attributes.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_COMPILER_ATTRIBUTES_H +#define AMDKCL_COMPILER_ATTRIBUTES_H + +#ifdef HAVE_LINUX_COMPILER_ATTRIBUTES_H +#include +#endif + +#ifndef fallthrough +#define fallthrough do {} while (0) /* fallthrough */ +#endif + +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + +#ifndef __counted_by +#if __has_attribute(__counted_by__) +# define __counted_by(member) __attribute__((__counted_by__(member))) +#else +# define __counted_by(member) +#endif +#endif + +#endif /* AMDKCL_COMPILER_ATTRIBUTES_H */ diff --git a/include/kcl/kcl_cpumask.h b/include/kcl/kcl_cpumask.h new file mode 100644 index 0000000000000..aee779d6ec5f2 --- /dev/null +++ b/include/kcl/kcl_cpumask.h @@ -0,0 +1,43 @@ +/*SPDX-License-Identifier: GPL-2.0*/ + +#include +#include +#include +#include +#include + +#ifndef for_each_cpu_wrap + +extern int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, + int start, bool wrap); + +static inline +int cpumask_next_wrap(int n, const struct cpumask *mask, + int start, bool wrap) +{ +return _kcl_cpumask_next_wrap(n, mask, start, wrap); +} + +/* Copied from include/linux/cpumask.h */ +#if NR_CPUS == 1 +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) +#else +/** + * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * @start: the start location + * + * The implementation does not assume any bit in @mask is set (including @start). + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ + (cpu) < nr_cpumask_bits; \ + (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) + +#endif +#endif + diff --git a/include/kcl/kcl_debugfs.h b/include/kcl/kcl_debugfs.h new file mode 100644 index 0000000000000..ca6a8d391da78 --- /dev/null +++ b/include/kcl/kcl_debugfs.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * debugfs.h - a tiny little debug file system + * + * Copyright (C) 2004 Greg Kroah-Hartman + * Copyright (C) 2004 IBM Inc. + * + * debugfs is for people to use instead of /proc or /sys. + * See Documentation/filesystems/ for more details. + */ + +#ifndef KCL_DEBUGFS_H_ +#define KCL_DEBUGFS_H_ + +#include +#include +#include + +#include +#include + +#if defined(DEFINE_DEBUGFS_ATTRIBUTE) && !defined(DEFINE_DEBUGFS_ATTRIBUTE_SIGNED) +#define KCL_FAKE_DEBUGFS_ATTRIBUTE_SIGNED +#define DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ + .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \ + .llseek = no_llseek, \ +} + +#define DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ + DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) + +#if defined(CONFIG_DEBUG_FS) +ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); +#else +static inline ssize_t debugfs_attr_write_signed(struct file *file, + const char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} +#endif /* CONFIG_DEBUG_FS */ + +#endif /* DEFINE_DEBUGFS_ATTRIBUTE_SIGNED */ + +#endif diff --git a/include/kcl/kcl_debugfs_inode.h b/include/kcl/kcl_debugfs_inode.h new file mode 100644 index 0000000000000..a21af633d09d6 --- /dev/null +++ b/include/kcl/kcl_debugfs_inode.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * debugfs.h - a tiny little debug file system + * + * Copyright (C) 2004 Greg Kroah-Hartman + * Copyright (C) 2004 IBM Inc. + * + * debugfs is for people to use instead of /proc or /sys. + * See Documentation/filesystems/ for more details. + */ +#include +#include + +#ifndef HAVE_DEBUGFS_CREATE_FILE_SIZE +#ifdef CONFIG_DEBUG_FS +void debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size); +#else +static inline void debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size) +{ } +#endif +#endif diff --git a/include/kcl/kcl_delay.h b/include/kcl/kcl_delay.h new file mode 100644 index 0000000000000..f5f2962c6bb6d --- /dev/null +++ b/include/kcl/kcl_delay.h @@ -0,0 +1,18 @@ +#ifndef AMDKCL_DELAY_H +#define AMDKCL_DELAY_H + +#ifndef HAVE_FSLEEP +static inline void _kcl_fsleep(unsigned long usecs) +{ + if (usecs <= 10) + udelay(usecs); + else if (usecs <= 20000) + usleep_range(usecs, 2 * usecs); + else + msleep(DIV_ROUND_UP(usecs, 1000)); +} + +#define fsleep _kcl_fsleep + +#endif +#endif diff --git a/include/kcl/kcl_device.h b/include/kcl/kcl_device.h new file mode 100644 index 0000000000000..a6480630d0ab2 --- /dev/null +++ b/include/kcl/kcl_device.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definitions for the NVM Express interface + * Copyright (c) 2011-2014, Intel Corporation. + */ +#ifndef AMDKCL_DEVICE_H +#define AMDKCL_DEVICE_H + +#include +#include + +/* Copied from include/linux/dev_printk.h */ +#if !defined(dev_err_once) +#ifdef CONFIG_PRINTK +#define dev_level_once(dev_level, dev, fmt, ...) \ +do { \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + dev_level(dev, fmt, ##__VA_ARGS__); \ + } \ +} while (0) +#else +#define dev_level_once(dev_level, dev, fmt, ...) \ +do { \ + if (0) \ + dev_level(dev, fmt, ##__VA_ARGS__); \ +} while (0) +#endif + +#define dev_err_once(dev, fmt, ...) \ + dev_level_once(dev_err, dev, fmt, ##__VA_ARGS__) +#endif + +#if !defined(dev_err_ratelimited) +#define dev_level_ratelimited(dev_level, dev, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + dev_level(dev, fmt, ##__VA_ARGS__); \ +} while (0) + +#define dev_err_ratelimited(dev, fmt, ...) \ + dev_level_ratelimited(dev_err, dev, fmt, ##__VA_ARGS__) +#endif + +#if !defined(HAVE_DEV_PM_SET_DRIVER_FLAGS) +/* rhel7.7 wrap macro dev_pm_set_driver_flags in drm/drm_backport.h */ +#ifdef dev_pm_set_driver_flags +#undef dev_pm_set_driver_flags +#endif +#define DPM_FLAG_NEVER_SKIP BIT(0) +#define DPM_FLAG_SMART_PREPARE BIT(1) +static inline void dev_pm_set_driver_flags(struct device *dev, u32 flags) +{ + pr_warn_once("%s is not available\n", __func__); +} +#endif + +#ifndef HAVE_DEV_IS_REMOVABLE +static inline bool _kcl_dev_is_removable(struct device *dev) +{ + return false; +} +#define dev_is_removable _kcl_dev_is_removable +#endif + +#endif /* AMDKCL_DEVICE_H */ diff --git a/include/kcl/kcl_device_cgroup.h b/include/kcl/kcl_device_cgroup.h new file mode 100644 index 0000000000000..3eba9b4697856 --- /dev/null +++ b/include/kcl/kcl_device_cgroup.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_DEVICE_CGROUP_H +#define AMDKCL_DEVICE_CGROUP_H + +#include + +/* Copied from include/linux/device_cgroup.h */ +#ifndef DEVCG_DEV_CHAR +#define DEVCG_DEV_CHAR 2 +#endif +#ifndef DEVCG_ACC_READ +#define DEVCG_ACC_READ 2 +#endif +#ifndef DEVCG_ACC_WRITE +#define DEVCG_ACC_WRITE 4 +#endif + +/* Copied from security/device_cgroup.c and modified for KCL */ +#ifndef HAVE_DEVCGROUP_CHECK_PERMISSION +#if defined(CONFIG_CGROUP_DEVICE) +extern int (*__kcl_devcgroup_check_permission)(short type, u32 major, u32 minor, + short access); + +static inline int _kcl_devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ +#ifdef BPF_CGROUP_RUN_PROG_DEVICE_CGROUP + int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); + + if (rc) + return -EPERM; +#endif + + return __kcl_devcgroup_check_permission(type, major, minor, access); +} +#else +static inline int _kcl_devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ + return 0; +} +#endif /* CONFIG_CGROUP_DEVICE */ +#endif /* HAVE_DEVCGROUP_CHECK_PERMISSION */ + +#endif /* AMDKCL_DEVICE_CGROUP_H */ diff --git a/include/kcl/kcl_dma-buf-map.h b/include/kcl/kcl_dma-buf-map.h new file mode 100644 index 0000000000000..c3112da74c090 --- /dev/null +++ b/include/kcl/kcl_dma-buf-map.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer to dma-buf-mapped memory, plus helpers. + * Copied from include/kcl/dma-buf-map.h + */ + +#ifndef _KCL_KCL__DMA_BUF_MAP_H__H__ +#define _KCL_KCL__DMA_BUF_MAP_H__H__ + +#ifndef HAVE_LINUX_IOSYS_MAP_H +#include +#endif + +#ifndef HAVE_LINUX_DMA_BUF_MAP_H +#include + +/** + * struct dma_buf_map - Pointer to vmap'ed dma-buf memory. + * @vaddr_iomem: The buffer's address if in I/O memory + * @vaddr: The buffer's address if in system memory + * @is_iomem: True if the dma-buf memory is located in I/O + * memory, or false otherwise. + */ +struct dma_buf_map { + union { + void __iomem *vaddr_iomem; + void *vaddr; + }; + bool is_iomem; +}; + +/** + * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory + * @vaddr: A system-memory address + */ +#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \ + { \ + .vaddr = (vaddr_), \ + .is_iomem = false, \ + } + +/** + * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory + * @map: The dma-buf mapping structure + * @vaddr: A system-memory address + * + * Sets the address and clears the I/O-memory flag. + */ +static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr) +{ + map->vaddr = vaddr; + map->is_iomem = false; +} + +/** + * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory + * @map: The dma-buf mapping structure + * @vaddr_iomem: An I/O-memory address + * + * Sets the address and the I/O-memory flag. + */ +static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map, + void __iomem *vaddr_iomem) +{ + map->vaddr_iomem = vaddr_iomem; + map->is_iomem = true; +} + + +/** + * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality + * @lhs: The dma-buf mapping structure + * @rhs: A dma-buf mapping structure to compare with + * + * Two dma-buf mapping structures are equal if they both refer to the same type of memory + * and to the same address within that memory. + * + * Returns: + * True is both structures are equal, or false otherwise. + */ +static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs, + const struct dma_buf_map *rhs) +{ + if (lhs->is_iomem != rhs->is_iomem) + return false; + else if (lhs->is_iomem) + return lhs->vaddr_iomem == rhs->vaddr_iomem; + else + return lhs->vaddr == rhs->vaddr; +} + +/** + * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL + * @map: The dma-buf mapping structure + * + * Depending on the state of struct dma_buf_map.is_iomem, tests if the + * mapping is NULL. + * + * Returns: + * True if the mapping is NULL, or false otherwise. + */ +static inline bool dma_buf_map_is_null(const struct dma_buf_map *map) +{ + if (map->is_iomem) + return !map->vaddr_iomem; + return !map->vaddr; +} + +/** + * dma_buf_map_is_set - Tests is the dma-buf mapping has been set + * @map: The dma-buf mapping structure + * + * Depending on the state of struct dma_buf_map.is_iomem, tests if the + * mapping has been set. + * + * Returns: + * True if the mapping is been set, or false otherwise. + */ +static inline bool dma_buf_map_is_set(const struct dma_buf_map *map) +{ + return !dma_buf_map_is_null(map); +} + +/** + * dma_buf_map_clear - Clears a dma-buf mapping structure + * @map: The dma-buf mapping structure + * + * Clears all fields to zero; including struct dma_buf_map.is_iomem. So + * mapping structures that were set to point to I/O memory are reset for + * system memory. Pointers are cleared to NULL. This is the default. + */ +static inline void dma_buf_map_clear(struct dma_buf_map *map) +{ + if (map->is_iomem) { + map->vaddr_iomem = NULL; + map->is_iomem = false; + } else { + map->vaddr = NULL; + } +} + +/** + * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping + * @dst: The dma-buf mapping structure + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a dma-buf mapping. The source buffer is in system + * memory. Depending on the buffer's location, the helper picks the correct + * method of accessing the memory. + */ +static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem, src, len); + else + memcpy(dst->vaddr, src, len); +} + +/** + * dma_buf_map_incr - Increments the address stored in a dma-buf mapping + * @map: The dma-buf mapping structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a dma-buf mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + +#endif /* HAVE_LINUX_DMA_BUF_MAP_H */ + +#endif diff --git a/include/kcl/kcl_dma-buf.h b/include/kcl/kcl_dma-buf.h new file mode 100644 index 0000000000000..fe7094bc3071e --- /dev/null +++ b/include/kcl/kcl_dma-buf.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer to dma-buf-mapped memory, plus helpers. + * Copied from include/kcl/dma-buf.h + */ +#ifndef _KCL_KCL__DMA_BUF_H__H__ +#define _KCL_KCL__DMA_BUF_H__H__ + +#include + +#ifndef HAVE_DMA_BUF_IS_DYNAMIC +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) +{ + return false; +} +#endif + +#endif \ No newline at end of file diff --git a/include/kcl/kcl_dma-resv.h b/include/kcl/kcl_dma-resv.h new file mode 100644 index 0000000000000..4c2b2576374ed --- /dev/null +++ b/include/kcl/kcl_dma-resv.h @@ -0,0 +1,246 @@ +/* + * Header file for reservations for dma-buf and ttm + * + * Copyright(C) 2011 Linaro Limited. All rights reserved. + * Copyright (C) 2012-2013 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + * Thomas Hellstrom + * + * Based on bo.c which bears the following copyright notice, + * but is dual licensed: + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * NOTICE: + * THIS HEADER IS FOR DMA-RESV.H ONLY + * DO NOT INCLUDE THIS HEADER ANY OTHER PLACE + * INCLUDE LINUX/DMA-RESV.H OR LINUX/RESERVATION.H INSTEAD + */ +#ifndef KCL_KCL_DMA_RESV_H +#define KCL_KCL_DMA_RESV_H + +#include +#include +#include +#include +#include + +struct dma_resv_list; + +enum dma_resv_usage { + /** + * @DMA_RESV_USAGE_KERNEL: For in kernel memory management only. + * + * This should only be used for things like copying or clearing memory + * with a DMA hardware engine for the purpose of kernel memory + * management. + * + * Drivers *always* must wait for those fences before accessing the + * resource protected by the dma_resv object. The only exception for + * that is when the resource is known to be locked down in place by + * pinning it previously. + */ + DMA_RESV_USAGE_KERNEL, + + /** + * @DMA_RESV_USAGE_WRITE: Implicit write synchronization. + * + * This should only be used for userspace command submissions which add + * an implicit write dependency. + */ + DMA_RESV_USAGE_WRITE, + + /** + * @DMA_RESV_USAGE_READ: Implicit read synchronization. + * + * This should only be used for userspace command submissions which add + * an implicit read dependency. + */ + DMA_RESV_USAGE_READ, + + /** + * @DMA_RESV_USAGE_BOOKKEEP: No implicit sync. + * + * This should be used by submissions which don't want to participate in + * implicit synchronization. + * + * The most common case are preemption fences as well as page table + * updates and their TLB flushes. + */ + DMA_RESV_USAGE_BOOKKEEP +}; + +#if defined(HAVE_DMA_RESV_FENCES) +struct dma_resv { + struct ww_mutex lock; + struct dma_resv_list __rcu *fences; +}; + +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @usage: Return fences with this usage or lower. */ + enum dma_resv_usage usage; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @fence_usage: the usage of the current fence */ + enum dma_resv_usage fence_usage; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences; private, *MUST* not dereference */ + struct dma_resv_list *fences; + + /** @num_fences: number of fences */ + unsigned int num_fences; + + /** @is_restarted: true if this is the first returned fence */ + bool is_restarted; +}; + +#else + +/** + * struct dma_resv_list - a list of shared fences + * @rcu: for internal use + * @shared_count: table of shared fences + * @shared_max: for growing shared fence table + * @shared: shared fence table + */ +struct dma_resv_list { + struct rcu_head rcu; + u32 shared_count, shared_max; + struct dma_fence __rcu *shared[]; +}; + +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @usage: Return fences with this usage or lower. */ + enum dma_resv_usage usage; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @fence_usage: the usage of the current fence */ + enum dma_resv_usage fence_usage; + + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences; private, *MUST* not dereference */ + struct dma_resv_list *fences; + + /** @shared_count: number of shared fences */ + unsigned int shared_count; + + /** @is_restarted: true if this is the first returned fence */ + bool is_restarted; + + /** @excl_fence: keep a reference to excl_fence when begin iterating kernel fences */ + struct dma_fence *excl_fence; + + /** @kernel_iter: next kernel fence pointer when iterating kernel fences */ + struct dma_fence *kernel_iter; +}; + +#if defined(HAVE_DMA_RESV_SEQCOUNT_WW_MUTEX_T) +struct dma_resv { + struct ww_mutex lock; + seqcount_ww_mutex_t seq; + + struct dma_fence __rcu *fence_excl; + struct dma_resv_list __rcu *fence; +}; +#else +struct dma_resv { + struct ww_mutex lock; + seqcount_t seq; + + struct dma_fence __rcu *fence_excl; + struct dma_resv_list __rcu *fence; +}; +#endif + +/** + * dma_resv_excl_fence - return the object's exclusive fence + * @obj: the reservation object + * + * Returns the exclusive fence (if any). Caller must either hold the objects + * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(), + * or one of the variants of each + * + * RETURNS + * The exclusive fence or NULL + */ +static inline struct dma_fence * +dma_resv_excl_fence(struct dma_resv *obj) +{ + return rcu_dereference_check(obj->fence_excl, lockdep_is_held(&(obj)->lock.base)); +} + +/** + * dma_resv_shared_list - get the reservation object's shared fence list + * @obj: the reservation object + * + * Returns the shared fence list. Caller must either hold the objects + * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(), + * or one of the variants of each + */ +static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) +{ + return rcu_dereference_check(obj->fence, lockdep_is_held(&(obj)->lock.base)); +} + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == 0; +} + +#endif /* !defined(HAVE_DMA_RESV_FENCES) */ + +#if !defined(smp_store_mb) +#define smp_store_mb set_mb +#endif +#endif diff --git a/include/kcl/kcl_dma_fence.h b/include/kcl/kcl_dma_fence.h new file mode 100644 index 0000000000000..a24278c214244 --- /dev/null +++ b/include/kcl/kcl_dma_fence.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Fence mechanism for dma-buf to allow for asynchronous dma access + * + * Copyright (C) 2012 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + */ + +#ifndef AMDKCL_DMA_FENCE_H +#define AMDKCL_DMA_FENCE_H + +#ifndef HAVE_DMA_FENCE_IS_CONTAINER +#include + +#if !defined(HAVE_LINUX_FENCE_ARRAY_H) +#include +#endif +/** + * dma_fence_is_chain - check if a fence is from the chain subclass + * @fence: the fence to test + * + * Return true if it is a dma_fence_chain and false otherwise. + */ +static inline bool dma_fence_is_chain(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_chain_ops; +} + +/** + * dma_fence_is_container - check if a fence is a container for other fences + * @fence: the fence to test + * + * Return true if this fence is a container for other fences, false otherwise. + * This is important since we can't build up large fence structure or otherwise + * we run into recursion during operation on those fences. + */ +static inline bool dma_fence_is_container(struct dma_fence *fence) +{ + return dma_fence_is_array(fence) || dma_fence_is_chain(fence); +} + +#endif /* HAVE_DMA_FENCE_IS_CONTAINER */ + +#ifndef HAVE_DMA_FENCE_TIMESTAMP +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} +#endif + +/* copy from include/linux/dma-fence.h*/ +#ifndef HAVE_DMA_FENCE_IS_LATER_OR_SAME +static inline bool dma_fence_is_later_or_same(struct dma_fence *f1, + struct dma_fence *f2) +{ + return f1 == f2 || dma_fence_is_later(f1, f2); +} +#endif /*HAVE_DMA_FENCE_IS_LATER_OR_SAME*/ +#endif diff --git a/include/kcl/kcl_dma_fence_chain.h b/include/kcl/kcl_dma_fence_chain.h new file mode 100644 index 0000000000000..97900481479c5 --- /dev/null +++ b/include/kcl/kcl_dma_fence_chain.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * fence-chain: chain fences together in a timeline + * + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * Authors: + * Christian König + */ +#ifndef AMDKCL_DMA_FENCE_CHAIN_H +#define AMDKCL_DMA_FENCE_CHAIN_H + +#ifdef HAVE_LINUX_DMA_FENCE_CHAIN_H +#include +#endif + +#if !defined(HAVE_STRUCT_DMA_FENCE_CHAIN) +#include +#include +#include + +/** + * struct dma_fence_chain - fence to represent an node of a fence chain + * @base: fence base class + * @prev: previous fence of the chain + * @prev_seqno: original previous seqno before garbage collection + * @fence: encapsulated fence + * @lock: spinlock for fence handling + */ +struct dma_fence_chain { + struct dma_fence base; + struct dma_fence __rcu *prev; + u64 prev_seqno; + struct dma_fence *fence; + union { + /** + * @cb: callback for signaling + * + * This is used to add the callback for signaling the + * complection of the fence chain. Never used at the same time + * as the irq work. + */ + struct dma_fence_cb cb; + + /** + * @work: irq work item for signaling + * + * Irq work structure to allow us to add the callback without + * running into lock inversion. Never used at the same time as + * the callback. + */ + struct irq_work work; + }; + spinlock_t lock; +}; + +extern const struct dma_fence_ops dma_fence_chain_ops; + +/** + * to_dma_fence_chain - cast a fence to a dma_fence_chain + * @fence: fence to cast to a dma_fence_array + * + * Returns NULL if the fence is not a dma_fence_chain, + * or the dma_fence_chain otherwise. + */ +static inline struct dma_fence_chain * +to_dma_fence_chain(struct dma_fence *fence) +{ + if (!fence || fence->ops != &dma_fence_chain_ops) + return NULL; + + return container_of(fence, struct dma_fence_chain, base); +} + +/** + * dma_fence_chain_for_each - iterate over all fences in chain + * @iter: current fence + * @head: starting point + * + * Iterate over all fences in the chain. We keep a reference to the current + * fence while inside the loop which must be dropped when breaking out. + */ +#define dma_fence_chain_for_each(iter, head) \ + for (iter = dma_fence_get(head); iter; \ + iter = dma_fence_chain_walk(iter)) + +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence); +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno); +void dma_fence_chain_init(struct dma_fence_chain *chain, + struct dma_fence *prev, + struct dma_fence *fence, + uint64_t seqno); + +#endif /* HAVE_STRUCT_DMA_FENCE_CHAIN */ + +#if !defined(HAVE_STRUCT_DMA_FENCE_CHAIN) || !defined(HAVE_DMA_FENCE_CHAIN_ALLOC) +/** + * dma_fence_chain_alloc + * + * Returns a new struct dma_fence_chain object or NULL on failure. + */ +static inline struct dma_fence_chain *dma_fence_chain_alloc(void) +{ + return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); +}; + +/** + * dma_fence_chain_free + * @chain: chain node to free + * + * Frees up an allocated but not used struct dma_fence_chain object. This + * doesn't need an RCU grace period since the fence was never initialized nor + * published. After dma_fence_chain_init() has been called the fence must be + * released by calling dma_fence_put(), and not through this function. + */ +static inline void dma_fence_chain_free(struct dma_fence_chain *chain) +{ + kfree(chain); +}; + +#endif + +#ifndef HAVE_DMA_FENCE_CHAIN_CONTAINED +/** + * dma_fence_chain_contained - return the contained fence + * @fence: the fence to test + * + * If the fence is a dma_fence_chain the function returns the fence contained + * inside the chain object, otherwise it returns the fence itself. + */ +static inline struct dma_fence * +dma_fence_chain_contained(struct dma_fence *fence) +{ + struct dma_fence_chain *chain = to_dma_fence_chain(fence); + + return chain ? chain->fence : fence; +} +#endif /* HAVE_DMA_FENCE_CHAIN_CONTAINED */ + +#endif diff --git a/include/kcl/kcl_dma_mapping.h b/include/kcl/kcl_dma_mapping.h new file mode 100644 index 0000000000000..c433caeca3d98 --- /dev/null +++ b/include/kcl/kcl_dma_mapping.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_DMA_MAPPING_H +#define AMDKCL_DMA_MAPPING_H + +#include +#include + +/* + * commit v4.8-11962-ga9a62c938441 + * dma-mapping: introduce the DMA_ATTR_NO_WARN attribute + */ +#ifndef DMA_ATTR_NO_WARN +#define DMA_ATTR_NO_WARN (0UL) +#endif + +/* +* commit v5.3-rc1-57-g06532750010e +* dma-mapping: use dma_get_mask in dma_addressing_limited + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0) +#define AMDKCL_DMA_ADDRESSING_LIMITED_WORKAROUND +#endif + +#ifdef HAVE_LINUX_DMA_ATTRS_H +static inline +void _kcl_convert_long_to_dma_attrs(struct dma_attrs *dma_attrs, + unsigned long attrs) +{ + int i; + + init_dma_attrs(dma_attrs); + + for (i = 0; i < DMA_ATTR_MAX; i++) { + if (attrs & (1 << i)) + dma_set_attr(i, dma_attrs); + } +} + +static inline +void *kcl_dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, unsigned long attrs) +{ + struct dma_attrs dma_attrs; + + _kcl_convert_long_to_dma_attrs(&dma_attrs, attrs); + return dma_alloc_attrs(dev, size, dma_handle, flag, &dma_attrs); +} + +static inline +void kcl_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + struct dma_attrs dma_attrs; + + _kcl_convert_long_to_dma_attrs(&dma_attrs, attrs); + dma_free_attrs(dev, size, cpu_addr, dma_handle, &dma_attrs); +} +#else +static inline void *kcl_dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + unsigned long attrs) +{ + return dma_alloc_attrs(dev, size, dma_handle, flag, attrs); +} +static inline void kcl_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + return dma_free_attrs(dev, size, cpu_addr, dma_handle, attrs); +} +#endif + +#ifndef HAVE_DMA_MAP_SGTABLE +#ifdef HAVE_LINUX_DMA_ATTRS_H +static inline +int _kcl_dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct dma_attrs dma_attrs; + + _kcl_convert_long_to_dma_attrs(&dma_attrs, attrs); + return dma_map_sg_attrs(dev, sg, nents, dir, &dma_attrs); +} + +static inline +void _kcl_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) + +{ + struct dma_attrs dma_attrs; + + _kcl_convert_long_to_dma_attrs(&dma_attrs, attrs); + dma_unmap_sg_attrs(dev, sg, nents, dir, &dma_attrs); +} + +#else +static inline +int _kcl_dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + return dma_map_sg_attrs(dev, sg, nents, dir, attrs); +} +static inline +void _kcl_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); +} +#endif /* HAVE_LINUX_DMA_ATTRS_H */ + +static inline int dma_map_sgtable(struct device *dev, struct sg_table *sgt, + enum dma_data_direction dir, unsigned long attrs) +{ + int nents; + + nents = _kcl_dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs); + if (nents <= 0) + return -EINVAL; + sgt->nents = nents; + return 0; +} + +static inline void dma_unmap_sgtable(struct device *dev, struct sg_table *sgt, + enum dma_data_direction dir, unsigned long attrs) +{ + _kcl_dma_unmap_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs); +} +#endif + +static inline bool kcl_has_dma_map_resource_ops(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + return ops == NULL || ops->map_resource != NULL; +} +/* + * v5.8-rc3-2-g68d237056e00 ("scatterlist: protect parameters of the sg_table related macros") + * v5.7-rc5-33-g709d6d73c756 ("scatterlist: add generic wrappers for iterating over sgtable objects") + * Copied from include/linux/scatterlist.h + */ +#ifndef for_each_sgtable_sg +#define for_each_sgtable_sg(sgt, sg, i) \ + for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i) +#endif + +#endif diff --git a/include/kcl/kcl_drm_aperture.h b/include/kcl/kcl_drm_aperture.h new file mode 100644 index 0000000000000..d4ca18d5c1792 --- /dev/null +++ b/include/kcl/kcl_drm_aperture.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef KCL_KCL_DRM_APERTURE_H +#define KCL_KCL_DRM_APERTURE_H + +#ifndef HAVE_DRM_DRM_APERTURE_H + +#include + +/* Copied from drm/drm_aperture.h */ +struct drm_device; +struct pci_dev; + +int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name); + +#endif /* HAVE_DRM_DRM_APERTURE_H */ + +#endif diff --git a/include/kcl/kcl_drm_atomic_helper.h b/include/kcl/kcl_drm_atomic_helper.h new file mode 100644 index 0000000000000..3af6d075cbe99 --- /dev/null +++ b/include/kcl/kcl_drm_atomic_helper.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2014 Red Hat + * Copyright (C) 2014 Intel Corp. + * Copyright (C) 2018 Intel Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark + * Daniel Vetter + */ +#ifndef AMDKCL_DRM_ATOMIC_HELPER_H +#define AMDKCL_DRM_ATOMIC_HELPER_H + +#include +#include +#include +#include +#include + +/* drm/atomic-helper: Remove _HELPER_ infix from DRM_PLANE_HELPER_NO_SCALING */ +#ifndef DRM_PLANE_NO_SCALING +#define DRM_PLANE_NO_SCALING (1<<16) +#endif + +/* + * v4.19-rc1-206-ge267364a6e1b + * drm/atomic: Initialise planes with opaque alpha values + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 20, 0) +#define AMDKCL__DRM_ATOMIC_HELPER_PLANE_RESET +void _kcl__drm_atomic_helper_plane_reset(struct drm_plane *plane, + struct drm_plane_state *state); +#endif + +#ifndef HAVE___DRM_ATOMIC_HELPER_CRTC_RESET +void __drm_atomic_helper_crtc_reset(struct drm_crtc *crtc, + struct drm_crtc_state *crtc_state); +#endif + +#ifndef HAVE_DRM_ATOMIC_HELPER_CALC_TIMESTAMPING_CONSTANTS +void drm_atomic_helper_calc_timestamping_constants(struct drm_atomic_state *state); +#endif + +#ifndef HAVE_DRM_ATOMIC_PLANE_ENABLING +static inline bool drm_atomic_plane_enabling(struct drm_plane_state *old_plane_state, + struct drm_plane_state *new_plane_state) +{ + /* + * When enabling a plane, CRTC and FB should always be set together. + * Anything else should be considered a bug in the atomic core, so we + * gently warn about it. + */ + WARN_ON((!new_plane_state->crtc && new_plane_state->fb) || + (new_plane_state->crtc && !new_plane_state->fb)); + + return !old_plane_state->crtc && new_plane_state->crtc; +} +#endif + +#endif diff --git a/include/kcl/kcl_drm_cache.h b/include/kcl/kcl_drm_cache.h new file mode 100644 index 0000000000000..8350e1faa62b5 --- /dev/null +++ b/include/kcl/kcl_drm_cache.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2009 Red Hat Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * + **************************************************************************/ +/* + * Authors: + * Dave Airlie + */ +#ifndef AMDKCL_DRM_CACHE_H +#define AMDKCL_DRM_CACHE_H +#include +#include + +#if !defined(HAVE_DRM_NEED_SWIOTLB) +bool drm_need_swiotlb(int dma_bits); +#endif /* HAVE_DRM_NEED_SWIOTLB */ + +/* + * Copied from include/drm/drm_cache.h + * v5.4-rc2-80-g268a2d600130 MIPS: Loongson64: Rename CPU TYPES + */ +static inline bool kcl_drm_arch_can_wc_memory(void) +{ +#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) + return false; +#elif defined(CONFIG_MIPS) && \ + (defined(CONFIG_CPU_LOONGSON64) || defined(CPU_LOONGSON3)) + + return false; +#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) + /* + * The DRM driver stack is designed to work with cache coherent devices + * only, but permits an optimization to be enabled in some cases, where + * for some buffers, both the CPU and the GPU use uncached mappings, + * removing the need for DMA snooping and allocation in the CPU caches. + * + * The use of uncached GPU mappings relies on the correct implementation + * of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU + * will use cached mappings nonetheless. On x86 platforms, this does not + * seem to matter, as uncached CPU mappings will snoop the caches in any + * case. However, on ARM and arm64, enabling this optimization on a + * platform where NoSnoop is ignored results in loss of coherency, which + * breaks correct operation of the device. Since we have no way of + * detecting whether NoSnoop works or not, just disable this + * optimization entirely for ARM and arm64. + */ + return false; +#else + return true; +#endif +} + +#endif /* AMDKCL_DRM_CACHE_H */ diff --git a/include/kcl/kcl_drm_client.h b/include/kcl/kcl_drm_client.h new file mode 100644 index 0000000000000..0857e2fc2cd65 --- /dev/null +++ b/include/kcl/kcl_drm_client.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef KCL_KCL_DRM_CLIENT_H +#define KCL_KCL_DRM_CLIENT_H + +#include + +#ifndef HAVE_DRM_CLIENT_REGISTER +static inline void drm_client_register(struct drm_client_dev *client) +{ + drm_client_add(client); +} +#endif /* HAVE_DRM_CLIENT_REGISTER */ + +#endif diff --git a/include/kcl/kcl_drm_connector.h b/include/kcl/kcl_drm_connector.h new file mode 100644 index 0000000000000..c0a969519be7f --- /dev/null +++ b/include/kcl/kcl_drm_connector.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2016 Intel Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ +#ifndef AMDKCL_DRM_CONNECTOR_H +#define AMDKCL_DRM_CONNECTOR_H + +#include +#include +#include +#include + +/** + * drm_connector_for_each_possible_encoder - iterate connector's possible encoders + * @connector: &struct drm_connector pointer + * @encoder: &struct drm_encoder pointer used as cursor + * @__i: int iteration cursor, for macro-internal use + */ +#ifndef drm_connector_for_each_possible_encoder +#define drm_connector_for_each_possible_encoder(connector, encoder, __i) \ + for ((__i) = 0; (__i) < ARRAY_SIZE((connector)->encoder_ids) && \ + (connector)->encoder_ids[(__i)] != 0; (__i)++) \ + for_each_if((encoder) = \ + drm_encoder_find((connector)->dev, NULL, \ + (connector)->encoder_ids[(__i)])) \ + +#endif + +#ifndef HAVE_DRM_CONNECTOR_INIT_WITH_DDC +int _kcl_drm_connector_init_with_ddc(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc); +static inline +int drm_connector_init_with_ddc(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc) +{ + return _kcl_drm_connector_init_with_ddc(dev, connector, funcs, connector_type, ddc); +} +#endif + +#ifndef DP_MAX_DOWNSTREAM_PORTS +#define DP_MAX_DOWNSTREAM_PORTS 0x10 +#endif + +#ifndef HAVE_DRM_MODE_CONFIG_DP_SUBCONNECTOR_PROPERTY +void drm_connector_attach_dp_subconnector_property(struct drm_connector *connector); +void drm_dp_set_subconnector_property(struct drm_connector *connector, enum drm_connector_status status, + const u8 *dpcd, const u8 prot_cap[4]); + +#define DRM_MODE_SUBCONNECTOR_VGA 1 +#define DRM_MODE_SUBCONNECTOR_DisplayPort 10 +#define DRM_MODE_SUBCONNECTOR_HDMIA 11 +#define DRM_MODE_SUBCONNECTOR_Native 15 +#define DRM_MODE_SUBCONNECTOR_Wireless 18 +#endif /* HAVE_DRM_MODE_CONFIG_DP_SUBCONNECTOR_PROPERTY */ + +#ifndef HAVE_DRM_CONNECTOR_ATOMIC_HDR_METADATA_EQUAL +bool drm_connector_atomic_hdr_metadata_equal(struct drm_connector_state *old_state, + struct drm_connector_state *new_state); +#endif + +#if !defined(HAVE_DRM_CONNECTOR_ATTACH_HDR_OUTPUT_METADATA_PROPERTY) +int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector); +#endif + +#ifndef HAVE_DRM_CONNECTOR_SET_PANEL_ORIENTATION_WITH_QUIRK +int _kcl_drm_connector_set_panel_orientation_with_quirk( + struct drm_connector *connector, + enum drm_panel_orientation panel_orientation, + int width, int height); + +static inline +int drm_connector_set_panel_orientation_with_quirk( + struct drm_connector *connector, + enum drm_panel_orientation panel_orientation, + int width, int height) +{ + return _kcl_drm_connector_set_panel_orientation_with_quirk(connector, panel_orientation, width, height); +} +#endif + +#ifndef HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY +int _kcl_drm_connector_attach_colorspace_property(struct drm_connector *connector); +#define drm_connector_attach_colorspace_property _kcl_drm_connector_attach_colorspace_property +#endif /* HAVE_DRM_CONNECT_ATTACH_COLORSPACE_PROPERTY */ + +#ifndef HAVE_DRM_MODE_CREATE_HDMI_COLORSPACE_PROPERTY_2ARGS +#define KCL_DRM_MODE_CREATE_COLORSPACE_PROPERTY +int _kcl_drm_mode_create_hdmi_colorspace_property(struct drm_connector *connector, + u32 supported_colorspaces); +#define drm_mode_create_hdmi_colorspace_property _kcl_drm_mode_create_hdmi_colorspace_property +#endif /* HAVE_DRM_MODE_CREATE_HDMI_COLORSPACE_PROPERTY_2ARGS */ + +#ifndef HAVE_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_2ARGS +#define KCL_DRM_MODE_CREATE_COLORSPACE_PROPERTY +int _kcl_drm_mode_create_dp_colorspace_property(struct drm_connector *connector, + u32 supported_colorspaces); +#define drm_mode_create_dp_colorspace_property _kcl_drm_mode_create_dp_colorspace_property +#endif /* HAVE_DRM_MODE_CREATE_DP_COLORSPACE_PROPERTY_2ARGS */ + +#ifdef KCL_DRM_MODE_CREATE_COLORSPACE_PROPERTY +#define DRM_MODE_COLORIMETRY_COUNT 16 +#endif + +#ifndef DRM_COLOR_FORMAT_YCBCR444 +#define DRM_COLOR_FORMAT_YCBCR444 (1<<1) +#endif + +#ifndef DRM_COLOR_FORMAT_YCBCR422 +#define DRM_COLOR_FORMAT_YCBCR422 (1<<2) +#endif + +#ifndef DRM_COLOR_FORMAT_YCBCR420 +#define DRM_COLOR_FORMAT_YCBCR420 (1<<3) +#endif + +/* For Default case, driver will set the colorspace */ +#ifndef DRM_MODE_COLORIMETRY_DEFAULT +/* For Default case, driver will set the colorspace */ +#define DRM_MODE_COLORIMETRY_DEFAULT 0 +/* CEA 861 Normal Colorimetry options */ +#define DRM_MODE_COLORIMETRY_NO_DATA 0 +#define DRM_MODE_COLORIMETRY_SMPTE_170M_YCC 1 +#define DRM_MODE_COLORIMETRY_BT709_YCC 2 +/* CEA 861 Extended Colorimetry Options */ +#define DRM_MODE_COLORIMETRY_XVYCC_601 3 +#define DRM_MODE_COLORIMETRY_XVYCC_709 4 +#define DRM_MODE_COLORIMETRY_SYCC_601 5 +#define DRM_MODE_COLORIMETRY_OPYCC_601 6 +#define DRM_MODE_COLORIMETRY_OPRGB 7 +#define DRM_MODE_COLORIMETRY_BT2020_CYCC 8 +#define DRM_MODE_COLORIMETRY_BT2020_RGB 9 +#define DRM_MODE_COLORIMETRY_BT2020_YCC 10 +/* Additional Colorimetry extension added as part of CTA 861.G */ +#define DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65 11 +#define DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER 12 +#endif /* DRM_MODE_COLORIMETRY_DEFAULT */ + +/* v5.3-rc1-676-g45cf0e91df8c */ +#ifndef DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED +/* Additional Colorimetry Options added for DP 1.4a VSC Colorimetry Format */ +#define DRM_MODE_COLORIMETRY_RGB_WIDE_FIXED 13 +#define DRM_MODE_COLORIMETRY_RGB_WIDE_FLOAT 14 +#define DRM_MODE_COLORIMETRY_BT601_YCC 15 +#endif + +#endif /* AMDKCL_DRM_CONNECTOR_H */ diff --git a/include/kcl/kcl_drm_crtc.h b/include/kcl/kcl_drm_crtc.h new file mode 100644 index 0000000000000..3911fa0faaa04 --- /dev/null +++ b/include/kcl/kcl_drm_crtc.h @@ -0,0 +1,108 @@ +/* + * Copyright © 2006 Keith Packard + * Copyright © 2007-2008 Dave Airlie + * Copyright © 2007-2008 Intel Corporation + * Jesse Barnes + * For codes copied from include/drm/drm_crtc.h + * + * Copyright © 2006 Keith Packard + * Copyright © 2007-2008 Dave Airlie + * Copyright © 2007-2008 Intel Corporation + * Jesse Barnes + * For codes copied from include/drm/drm_crtc_helper.h + * + * Copyright (c) 2007 Dave Airlie + * Copyright (c) 2007 Jakob Bornecrantz + * Copyright (c) 2008 Red Hat Inc. + * Copyright (c) 2007-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * Copyright (c) 2007-2008 Intel Corporation + * For codes copied from include/drm/drm_mode.h + * + * Copyright 2018 Intel Corporation + * For codes copied from include/drm/drm_util.h + * + * Copyright (c) 2016 Intel Corporation + * For codes copied from include/drm/drm_encoder.h + * + * Copyright (c) 2016 Intel Corporation + * For codes copied from include/drm/drm_framebuffer.h + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef KCL_KCL_DRM_CRTC_H +#define KCL_KCL_DRM_CRTC_H + +#include +#include +#include + +/* Copied from include/drm/drm_mode.h */ +#ifndef DRM_MODE_ROTATE_0 +#define DRM_MODE_ROTATE_0 (1<<0) +#endif +#ifndef DRM_MODE_ROTATE_90 +#define DRM_MODE_ROTATE_90 (1<<1) +#endif +#ifndef DRM_MODE_ROTATE_180 +#define DRM_MODE_ROTATE_180 (1<<2) +#endif +#ifndef DRM_MODE_ROTATE_270 +#define DRM_MODE_ROTATE_270 (1<<3) +#endif + +#ifndef DRM_MODE_ROTATE_MASK +#define DRM_MODE_ROTATE_MASK (\ + DRM_MODE_ROTATE_0 | \ + DRM_MODE_ROTATE_90 | \ + DRM_MODE_ROTATE_180 | \ + DRM_MODE_ROTATE_270) +#endif + +/* Copied from include/drm/drm_util.h */ +/* helper for handling conditionals in various for_each macros */ +#ifndef for_each_if +#define for_each_if(condition) if (!(condition)) {} else +#endif + +#ifndef drm_for_each_crtc +#define drm_for_each_crtc(crtc, dev) \ + list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head) +#endif + +#ifndef drm_for_each_encoder +#define drm_for_each_encoder(encoder, dev) \ + list_for_each_entry(encoder, &(dev)->mode_config.encoder_list, head) +#endif + +#ifndef drm_for_each_fb +#define drm_for_each_fb(fb, dev) \ + list_for_each_entry(fb, &(dev)->mode_config.fb_list, head) +#endif + +#if !defined(HAVE_DRM_HELPER_FORCE_DISABLE_ALL) +int _kcl_drm_helper_force_disable_all(struct drm_device *dev); +static inline +int drm_helper_force_disable_all(struct drm_device *dev) +{ + return _kcl_drm_helper_force_disable_all(dev); +} +#endif + + +#endif diff --git a/include/kcl/kcl_drm_dp.h b/include/kcl/kcl_drm_dp.h new file mode 100644 index 0000000000000..7f3607f89c6a4 --- /dev/null +++ b/include/kcl/kcl_drm_dp.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ +#ifndef _KCL_DRM_DP_H +#define _KCL_DRM_DP_H + +#include + +#ifndef DP_SINK_VIDEO_FALLBACK_FORMATS +#define DP_SINK_VIDEO_FALLBACK_FORMATS 0x020 +#endif +#ifndef DP_FEC_CAPABILITY_1 +#define DP_FEC_CAPABILITY_1 0x091 +#endif + +#ifndef DP_DSC_CONFIGURATION +#define DP_DSC_CONFIGURATION 0x161 +#endif +#ifndef DP_PHY_SQUARE_PATTERN +#define DP_PHY_SQUARE_PATTERN 0x249 +#endif + +#ifndef DP_DSC_MAX_SLICE_COUNT_AND_AGGREGATION_0 +#define DP_DSC_MAX_SLICE_COUNT_AND_AGGREGATION_0 0x2270 +#endif +#ifndef DP_DSC_DECODER_0_MAXIMUM_SLICE_COUNT_MASK +#define DP_DSC_DECODER_0_MAXIMUM_SLICE_COUNT_MASK (1 << 0) +#endif +#ifndef DP_DSC_DECODER_0_AGGREGATION_SUPPORT_MASK +#define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_MASK (0b111 << 1) +#endif +#ifndef DP_DSC_DECODER_0_AGGREGATION_SUPPORT_SHIFT +#define DP_DSC_DECODER_0_AGGREGATION_SUPPORT_SHIFT 1 +#endif +#ifndef DP_DSC_DECODER_COUNT_MASK +#define DP_DSC_DECODER_COUNT_MASK (0b111 << 5) +#endif +#ifndef DP_DSC_DECODER_COUNT_SHIFT +#define DP_DSC_DECODER_COUNT_SHIFT 5 +#endif +#ifndef DP_MAIN_LINK_CHANNEL_CODING_SET +#define DP_MAIN_LINK_CHANNEL_CODING_SET 0x108 +#endif +#ifndef DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER +#define DP_MAIN_LINK_CHANNEL_CODING_PHY_REPEATER 0xF0006 +#endif +#ifndef DP_INTRA_HOP_AUX_REPLY_INDICATION +#define DP_INTRA_HOP_AUX_REPLY_INDICATION (1 << 3) +#endif + +#ifndef DP_DFP_CAPABILITY_EXTENSION_SUPPORT +#define DP_DFP_CAPABILITY_EXTENSION_SUPPORT 0x0A3 +#endif +#ifndef DP_TEST_264BIT_CUSTOM_PATTERN_7_0 +#define DP_TEST_264BIT_CUSTOM_PATTERN_7_0 0X2230 +#endif +#ifndef DP_TEST_264BIT_CUSTOM_PATTERN_263_256 +#define DP_TEST_264BIT_CUSTOM_PATTERN_263_256 0X2250 +#endif + +/* v5.9-rc5-1031-g7d56927efac7 * + * drm/dp: add a number of DP 2.0 DPCD definitions */ +#ifndef DP_LINK_BW_10 +#define DP_LINK_BW_10 0x01 /* 2.0 128b/132b Link Layer */ +#define DP_LINK_BW_13_5 0x04 /* 2.0 128b/132b Link Layer */ +#define DP_LINK_BW_20 0x02 /* 2.0 128b/132b Link Layer */ +#endif + +#endif \ No newline at end of file diff --git a/include/kcl/kcl_drm_dp_cec.h b/include/kcl/kcl_drm_dp_cec.h new file mode 100644 index 0000000000000..a50c290cc7248 --- /dev/null +++ b/include/kcl/kcl_drm_dp_cec.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DisplayPort CEC-Tunneling-over-AUX support + * + * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + */ + +#ifndef __KCL_KCL_DRM_DP_CEC_H__ +#define __KCL_KCL_DRM_DP_CEC_H__ + +#include + +/* + * commit v4.19-rc1-100-g5ce70c799ac2 + * drm_dp_cec: check that aux has a transfer function + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 20, 0) +#define AMDKCL_DRM_DP_CEC_XXX_CHECK_CB +#endif + +/* Copied from gpu/drm/drm_dp_cec.c and modified for KCL */ +#if defined(AMDKCL_DRM_DP_CEC_XXX_CHECK_CB) +static inline void _kcl_drm_dp_cec_irq(struct drm_dp_aux *aux) +{ +#ifdef CONFIG_DRM_DP_CEC + /* No transfer function was set, so not a DP connector */ + if (!aux->transfer) + return; +#endif + + drm_dp_cec_irq(aux); +} + +static inline void _kcl_drm_dp_cec_set_edid(struct drm_dp_aux *aux, + const struct edid *edid) +{ +#ifdef CONFIG_DRM_DP_CEC + /* No transfer function was set, so not a DP connector */ + if (!aux->transfer) + return; +#endif + + drm_dp_cec_set_edid(aux, edid); +} + +static inline void _kcl_drm_dp_cec_unset_edid(struct drm_dp_aux *aux) +{ +#ifdef CONFIG_DRM_DP_CEC + /* No transfer function was set, so not a DP connector */ + if (!aux->transfer) + return; +#endif + + drm_dp_cec_unset_edid(aux); +} +#endif + +#if !defined(HAVE_DRM_DP_CEC_REGISTER_CONNECTOR_PP) +static inline void _kcl_drm_dp_cec_register_connector(struct drm_dp_aux *aux, + struct drm_connector *connector) +{ +#ifdef CONFIG_DRM_DP_CEC + if (WARN_ON(!aux->transfer)) + return; +#endif + + drm_dp_cec_register_connector(aux, connector->name, connector->dev->dev); +} +#endif + + +#endif diff --git a/include/kcl/kcl_drm_dp_helper.h b/include/kcl/kcl_drm_dp_helper.h new file mode 100644 index 0000000000000..532d8160eba9d --- /dev/null +++ b/include/kcl/kcl_drm_dp_helper.h @@ -0,0 +1,367 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + + +#ifndef _KCL_DRM_DP_HELPER_H_ +#define _KCL_DRM_DP_HELPER_H_ + +#include +#include +#include + +#include +#include +#include +#include + +/* + * v4.13-rc5-840-gc673fe7f0cd5 + * drm/dp: DPCD register defines for link status within ESI field + */ +#ifndef DP_LANE0_1_STATUS_ESI +#define DP_LANE0_1_STATUS_ESI 0x200c /* status same as 0x202 */ +#define DP_LANE2_3_STATUS_ESI 0x200d /* status same as 0x203 */ +#define DP_LANE_ALIGN_STATUS_UPDATED_ESI 0x200e /* status same as 0x204 */ +#define DP_SINK_STATUS_ESI 0x200f /* status same as 0x205 */ +#endif + +/* + * v4.13-rc5-1383-gac58fff15516 + * drm/dp-helper: add missing defines needed by AMD display core. + */ +#ifndef DP_ADJUST_REQUEST_POST_CURSOR2 +#define DP_ADJUST_REQUEST_POST_CURSOR2 0x20c + +#define DP_TEST_MISC0 0x232 + +#define DP_TEST_PHY_PATTERN 0x248 +#define DP_TEST_80BIT_CUSTOM_PATTERN_7_0 0x250 +#define DP_TEST_80BIT_CUSTOM_PATTERN_15_8 0x251 +#define DP_TEST_80BIT_CUSTOM_PATTERN_23_16 0x252 +#define DP_TEST_80BIT_CUSTOM_PATTERN_31_24 0x253 +#define DP_TEST_80BIT_CUSTOM_PATTERN_39_32 0x254 +#define DP_TEST_80BIT_CUSTOM_PATTERN_47_40 0x255 +#define DP_TEST_80BIT_CUSTOM_PATTERN_55_48 0x256 +#define DP_TEST_80BIT_CUSTOM_PATTERN_63_56 0x257 +#define DP_TEST_80BIT_CUSTOM_PATTERN_71_64 0x258 +#define DP_TEST_80BIT_CUSTOM_PATTERN_79_72 0x259 + +#define DP_BRANCH_REVISION_START 0x509 + +#define DP_DP13_DPCD_REV 0x2200 +#define DP_DP13_MAX_LINK_RATE 0x2201 +#endif + + +#if !defined(DP_DPRX_FEATURE_ENUMERATION_LIST) +#define DP_DPRX_FEATURE_ENUMERATION_LIST 0x2210 /* DP 1.3 */ +#endif + +#if !defined(DP_TRAINING_PATTERN_SET_PHY_REPEATER1) +#define DP_TRAINING_PATTERN_SET_PHY_REPEATER1 0xf0010 /* 1.3 */ +#endif + +#if !defined(DP_LANE0_1_STATUS_PHY_REPEATER1) +#define DP_LANE0_1_STATUS_PHY_REPEATER1 0xf0030 /* 1.3 */ +#endif + +#if !defined(DP_ADJUST_REQUEST_LANE0_1_PHY_REPEATER1) +#define DP_ADJUST_REQUEST_LANE0_1_PHY_REPEATER1 0xf0033 /* 1.3 */ +#endif + +#if !defined(DP_TRAINING_LANE0_SET_PHY_REPEATER1) +#define DP_TRAINING_LANE0_SET_PHY_REPEATER1 0xf0011 /* 1.3 */ +#endif + +#if !defined(DP_PHY_REPEATER_MODE_TRANSPARENT) +#define DP_PHY_REPEATER_MODE_TRANSPARENT 0x55 /* 1.3 */ +#endif + +#if !defined(DP_PHY_REPEATER_MODE) +#define DP_PHY_REPEATER_MODE 0xf0003 /* 1.3 */ +#endif + +#if !defined(DP_PHY_REPEATER_MODE_NON_TRANSPARENT) +#define DP_PHY_REPEATER_MODE_NON_TRANSPARENT 0xaa /* 1.3 */ +#endif + +#if !defined(DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1) +#define DP_TRAINING_AUX_RD_INTERVAL_PHY_REPEATER1 0xf0020 /* 1.4a */ +#endif + +#if !defined(DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT) +#define DP_PHY_REPEATER_EXTENDED_WAIT_TIMEOUT 0xf0005 /* 1.4a */ +#endif + +#if !defined(DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV) +#define DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV 0xf0000 /* 1.3 */ +#endif + +#if !defined(DP_MAX_LINK_RATE_PHY_REPEATER) +#define DP_MAX_LINK_RATE_PHY_REPEATER 0xf0001 /* 1.4a */ +#endif + +#if !defined(DP_PHY_REPEATER_CNT) +#define DP_PHY_REPEATER_CNT 0xf0002 /* 1.3 */ +#endif + +#if !defined(DP_MAX_LANE_COUNT_PHY_REPEATER) +#define DP_MAX_LANE_COUNT_PHY_REPEATER 0xf0004 /* 1.4a */ +#endif + +#if !defined(DP_TEST_AUDIO_MODE) +#define DP_TEST_AUDIO_MODE 0x271 +#endif + +#if !defined(DP_TEST_AUDIO_PATTERN_TYPE) +#define DP_TEST_AUDIO_PATTERN_TYPE 0x272 +#endif + +#if !defined(DP_TEST_AUDIO_PERIOD_CH1) +#define DP_TEST_AUDIO_PERIOD_CH1 0x273 +#endif + +#if !defined(DP_DSC_SUPPORT) +#define DP_DSC_SUPPORT 0x060 /* DP 1.4 */ +#endif + +/* + * v5.6-1624-g8811d9eb4dfa + * drm/amd/display: Align macro name as per DP spec + */ +#ifdef DP_TEST_PHY_PATTERN +#define DP_PHY_TEST_PATTERN DP_TEST_PHY_PATTERN +#endif + +/* commit fc1424c2ec813080aa1eaa2948070902b1a0e507 + * drm: Correct DP DSC macro typo */ +#ifdef DP_DSC_THROUGHPUT_MODE_0_UPSUPPORTED +#define DP_DSC_THROUGHPUT_MODE_0_UNSUPPORTED DP_DSC_THROUGHPUT_MODE_0_UPSUPPORTED +#endif + +#ifndef DP_DSC_THROUGHPUT_MODE_0_UNSUPPORTED +# define DP_DSC_THROUGHPUT_MODE_0_UNSUPPORTED 0 +# define DP_DSC_THROUGHPUT_MODE_0_170 (15 << 0) /* 1.4a */ +#endif + +#ifndef DP_UHBR10 +# define DP_UHBR10 (1 << 0) +# define DP_UHBR20 (1 << 1) +# define DP_UHBR13_5 (1 << 2) +#endif + +#ifndef DP_PHY_REPEATER_128B132B_RATES +/* See DP_128B132B_SUPPORTED_LINK_RATES for values */ +#define DP_PHY_REPEATER_128B132B_RATES 0xf0007 /* 2.0 */ +#endif + +/* v5.9-rc4-979-g9782f52ab5d6 + * drm/dp: Add LTTPR helpers + */ +#ifndef DP_TRAINING_PATTERN_SET_PHY_REPEATER + +enum drm_dp_phy { + DP_PHY_DPRX, + + DP_PHY_LTTPR1, + DP_PHY_LTTPR2, + DP_PHY_LTTPR3, + DP_PHY_LTTPR4, + DP_PHY_LTTPR5, + DP_PHY_LTTPR6, + DP_PHY_LTTPR7, + DP_PHY_LTTPR8, + + DP_MAX_LTTPR_COUNT = DP_PHY_LTTPR8, +}; + +#define DP_PHY_LTTPR(i) (DP_PHY_LTTPR1 + (i)) +#define __DP_LTTPR1_BASE 0xf0010 /* 1.3 */ +#define __DP_LTTPR2_BASE 0xf0060 /* 1.3 */ +#define DP_LTTPR_BASE(dp_phy) \ + (__DP_LTTPR1_BASE + (__DP_LTTPR2_BASE - __DP_LTTPR1_BASE) * \ + ((dp_phy) - DP_PHY_LTTPR1)) +#define DP_LTTPR_REG(dp_phy, lttpr1_reg) \ + (DP_LTTPR_BASE(dp_phy) - DP_LTTPR_BASE(DP_PHY_LTTPR1) + (lttpr1_reg)) +#define DP_TRAINING_PATTERN_SET_PHY_REPEATER(dp_phy) \ + DP_LTTPR_REG(dp_phy, DP_TRAINING_PATTERN_SET_PHY_REPEATER1) +#endif + +#ifndef DP_FEC_STATUS_PHY_REPEATER + +#define __DP_FEC1_BASE 0xf0290 /* 1.4 */ +#define __DP_FEC2_BASE 0xf0298 /* 1.4 */ +#define DP_FEC_BASE(dp_phy) \ + (__DP_FEC1_BASE + ((__DP_FEC2_BASE - __DP_FEC1_BASE) * \ + ((dp_phy) - DP_PHY_LTTPR1))) +#define DP_FEC_REG(dp_phy, fec1_reg) \ + (DP_FEC_BASE(dp_phy) - DP_FEC_BASE(DP_PHY_LTTPR1) + fec1_reg) +#define DP_FEC_STATUS_PHY_REPEATER1 0xf0290 /* 1.4 */ +#define DP_FEC_STATUS_PHY_REPEATER(dp_phy) \ + DP_FEC_REG(dp_phy, DP_FEC_STATUS_PHY_REPEATER1) +#define DP_LTTPR_MAX_ADD 0xf02ff /* 1.4 */ +#define DP_DPCD_MAX_ADD 0xfffff /* 1.4 */ + +#endif + +/* + * v5.10-rc2-482-gce32a6239de6 + * drm/dp_helper: Add Helpers for FRL Link Training support for DP-HDMI2.1 PCON + */ +#ifndef DP_PCON_HDMI_POST_FRL_STATUS + +/* PCON CONFIGURE-1 FRL FOR HDMI SINK */ +#define DP_PCON_HDMI_LINK_CONFIG_1 0x305A +# define DP_PCON_ENABLE_MAX_FRL_BW (7 << 0) +# define DP_PCON_ENABLE_MAX_BW_0GBPS 0 +# define DP_PCON_ENABLE_MAX_BW_9GBPS 1 +# define DP_PCON_ENABLE_MAX_BW_18GBPS 2 +# define DP_PCON_ENABLE_MAX_BW_24GBPS 3 +# define DP_PCON_ENABLE_MAX_BW_32GBPS 4 +# define DP_PCON_ENABLE_MAX_BW_40GBPS 5 +# define DP_PCON_ENABLE_MAX_BW_48GBPS 6 +# define DP_PCON_ENABLE_SOURCE_CTL_MODE (1 << 3) +# define DP_PCON_ENABLE_CONCURRENT_LINK (1 << 4) +# define DP_PCON_ENABLE_SEQUENTIAL_LINK (0 << 4) +# define DP_PCON_ENABLE_LINK_FRL_MODE (1 << 5) +# define DP_PCON_ENABLE_HPD_READY (1 << 6) +# define DP_PCON_ENABLE_HDMI_LINK (1 << 7) + +/* PCON CONFIGURE-2 FRL FOR HDMI SINK */ +#define DP_PCON_HDMI_LINK_CONFIG_2 0x305B +# define DP_PCON_MAX_LINK_BW_MASK (0x3F << 0) +# define DP_PCON_FRL_BW_MASK_9GBPS (1 << 0) +# define DP_PCON_FRL_BW_MASK_18GBPS (1 << 1) +# define DP_PCON_FRL_BW_MASK_24GBPS (1 << 2) +# define DP_PCON_FRL_BW_MASK_32GBPS (1 << 3) +# define DP_PCON_FRL_BW_MASK_40GBPS (1 << 4) +# define DP_PCON_FRL_BW_MASK_48GBPS (1 << 5) +# define DP_PCON_FRL_LINK_TRAIN_EXTENDED (1 << 6) +# define DP_PCON_FRL_LINK_TRAIN_NORMAL (0 << 6) + +/* PCON HDMI LINK STATUS */ +#define DP_PCON_HDMI_TX_LINK_STATUS 0x303B +# define DP_PCON_HDMI_TX_LINK_ACTIVE (1 << 0) +# define DP_PCON_FRL_READY (1 << 1) + +/* PCON HDMI POST FRL STATUS */ +#define DP_PCON_HDMI_POST_FRL_STATUS 0x3036 +# define DP_PCON_HDMI_LINK_MODE (1 << 0) +# define DP_PCON_HDMI_MODE_TMDS 0 +# define DP_PCON_HDMI_MODE_FRL 1 +# define DP_PCON_HDMI_FRL_TRAINED_BW (0x3F << 1) +# define DP_PCON_FRL_TRAINED_BW_9GBPS (1 << 1) +# define DP_PCON_FRL_TRAINED_BW_18GBPS (1 << 2) +# define DP_PCON_FRL_TRAINED_BW_24GBPS (1 << 3) +# define DP_PCON_FRL_TRAINED_BW_32GBPS (1 << 4) +# define DP_PCON_FRL_TRAINED_BW_40GBPS (1 << 5) +# define DP_PCON_FRL_TRAINED_BW_48GBPS (1 << 6) +#endif +/* + * v4.16-rc7-1860-g0597017cd18d + * drm/dp: Add DP_DPCD_REV_XX to drm_dp_helper + */ + +/* DPCD Field Address Mapping */ + +/* Receiver Capability */ +#ifndef DP_DPCD_REV_14 +# define DP_DPCD_REV_10 0x10 +# define DP_DPCD_REV_11 0x11 +# define DP_DPCD_REV_12 0x12 +# define DP_DPCD_REV_13 0x13 +# define DP_DPCD_REV_14 0x14 +#endif + +/* + * v4.20-rc3-897-g71b15621f097 + * drm: Add the PSR SU granularity registers offsets + */ +#ifndef DP_PSR2_SU_X_GRANULARITY +#define DP_PSR2_SU_X_GRANULARITY 0x072 /* eDP 1.4b */ +#endif +#ifndef DP_PSR2_SU_Y_GRANULARITY +#define DP_PSR2_SU_Y_GRANULARITY 0x074 /* eDP 1.4b */ +#endif + +/* + * drm: Add PSR version 3 macro + */ +#ifndef DP_PSR2_WITH_Y_COORD_IS_SUPPORTED +# define DP_PSR2_WITH_Y_COORD_IS_SUPPORTED 3 /* eDP 1.4a */ +#endif + +/* + * drm: add PSR2 support and capability definition as per eDP 1.5 + */ +#ifndef DP_PSR2_WITH_Y_COORD_ET_SUPPORTED +# define DP_PSR2_WITH_Y_COORD_ET_SUPPORTED 4 /* eDP 1.5, adopted eDP 1.4b SCR */ +#endif + +/* + * v4.10-rc3-483-gd0ce90629120 + * drm : adds Y-coordinate and Colorimetry Format + */ +#ifndef DP_PSR2_SU_Y_COORDINATE_REQUIRED +# define DP_PSR2_SU_Y_COORDINATE_REQUIRED (1 << 4) /* eDP 1.4a */ +# define DP_PSR2_SU_GRANULARITY_REQUIRED (1 << 5) /* eDP 1.4b */ +#endif + +/* + * v5.15-rc1-244-gba3078dad140 + * drm/dp: add helpers to read link training delays + */ +#ifndef DP_128B132B_TRAINING_AUX_RD_INTERVAL +#define DP_128B132B_TRAINING_AUX_RD_INTERVAL 0x2216 /* 2.0 */ +#endif + +/* + * v5.9-rc5-1031-g7d56927efac7 + * drm/dp: add a number of DP 2.0 DPCD definitions + */ +#ifndef DP_128B132B_SUPPORTED_LINK_RATES +#define DP_128B132B_SUPPORTED_LINK_RATES 0x2215 /* 2.0 */ +#endif + +/* + * v6.0-2085-gbdf4b00bee5d + * drm/display: Add missing Adaptive Sync DPCD definitions + */ +#ifndef DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1 +#define DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1 0x2214 /* 2.0 E11 */ +#endif + +/* + * v6.1-4885-g200199ae9a64 + * drm/amd/display: Adding support for VESA SCR + */ +#ifndef DP_EDP_PANEL_LUMINANCE_CONTROL_CAPABLE +#define DP_EDP_PANEL_LUMINANCE_CONTROL_CAPABLE (1 << 4) +#endif +#ifndef DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE +#define DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE (1 << 7) +#endif +#ifndef DP_EDP_PANEL_TARGET_LUMINANCE_VALUE +#define DP_EDP_PANEL_TARGET_LUMINANCE_VALUE 0x734 +#endif + +#endif /* _KCL_DRM_DP_HELPER_H_ */ diff --git a/include/kcl/kcl_drm_dsc_helper.h b/include/kcl/kcl_drm_dsc_helper.h new file mode 100644 index 0000000000000..0c61de575753d --- /dev/null +++ b/include/kcl/kcl_drm_dsc_helper.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef _KCL_KCL_DRM_DSC_HELPER_H +#define _KCL_KCL_DRM_DSC_HELPER_H + +#include +#include + +#ifndef HAVE_DRM_DSC_PPS_PAYLOAD_PACK +void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_sdp, + const struct drm_dsc_config *dsc_cfg); +#endif + +#ifndef HAVE_DRM_DSC_COMPUTE_RC_PARAMETERS +int drm_dsc_compute_rc_parameters(struct drm_dsc_config *vdsc_cfg); +#endif + +#endif /* _KCL_KCL_DRM_DSC_HELPER_H */ + diff --git a/include/kcl/kcl_drm_edid.h b/include/kcl/kcl_drm_edid.h new file mode 100644 index 0000000000000..05afadb754485 --- /dev/null +++ b/include/kcl/kcl_drm_edid.h @@ -0,0 +1,59 @@ +#ifndef AMDKCL_DRM_EDID_H +#define AMDKCL_DRM_EDID_H + +#include + +#ifndef drm_edid_encode_panel_id +#define drm_edid_encode_panel_id(vend_chr_0, vend_chr_1, vend_chr_2, product_id) \ + ((((u32)(vend_chr_0) - '@') & 0x1f) << 26 | \ + (((u32)(vend_chr_1) - '@') & 0x1f) << 21 | \ + (((u32)(vend_chr_2) - '@') & 0x1f) << 16 | \ + ((product_id) & 0xffff)) +#endif /* drm_edid_encode_panel_id */ + +#ifndef DRM_EDID_RANGE_OFFSET_MIN_VFREQ +#define DRM_EDID_RANGE_OFFSET_MIN_VFREQ (1 << 0) /* 1.4 */ +#define DRM_EDID_RANGE_OFFSET_MAX_VFREQ (1 << 1) /* 1.4 */ +#define DRM_EDID_RANGE_OFFSET_MIN_HFREQ (1 << 2) /* 1.4 */ +#define DRM_EDID_RANGE_OFFSET_MAX_HFREQ (1 << 3) /* 1.4 */ +#endif + +#ifndef DRM_EDID_FEATURE_CONTINUOUS_FREQ +#define DRM_EDID_FEATURE_CONTINUOUS_FREQ (1 << 0) /* 1.4 */ +#endif + + +/* commit v5.18-rc5-1046-ge4ccf9a777d3 + drm/edid: add struct drm_edid container */ +#if !defined(HAVE_DRM_EDID_MALLOC) || !defined(HAVE_DRM_EDID_RAW) || !defined(HAVE_DRM_EDID_VALID) +struct drm_edid { + /* Size allocated for edid */ + size_t size; + const struct edid *edid; +}; +#endif + +#ifndef HAVE_DRM_EDID_MALLOC +const struct drm_edid *_kcl_drm_edid_alloc(const void *edid, size_t size); +void _kcl_drm_edid_free(const struct drm_edid *drm_edid); +#define drm_edid_alloc _kcl_drm_edid_alloc +#define drm_edid_free _kcl_drm_edid_free +#endif + +#ifndef HAVE_DRM_EDID_RAW +const struct edid *_kcl_drm_edid_raw(const struct drm_edid *drm_edid); +#define drm_edid_raw _kcl_drm_edid_raw +#endif + +#ifndef HAVE_DRM_EDID_VALID +static inline bool _kcl_drm_edid_valid(const struct drm_edid *drm_edid) +{ + if (!drm_edid) + return false; + + return drm_edid_is_valid(drm_edid->edid); +} +#define drm_edid_valid _kcl_drm_edid_valid +#endif + +#endif diff --git a/include/kcl/kcl_drm_exec.h b/include/kcl/kcl_drm_exec.h new file mode 100644 index 0000000000000..2ffba4ce2fef7 --- /dev/null +++ b/include/kcl/kcl_drm_exec.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#ifndef AMDKCL_DRM_EXEC_H +#define AMDKCL_DRM_EXEC_H + +#include +#include + +#ifdef HAVE_DRM_GEM_OBJECT_RESV +#define amdkcl_gem_resvp(bo) (bo->resv) +#else +#define amdkcl_gem_resvp(bo) (container_of(bo, struct ttm_buffer_object, base)->resv) +#endif +#ifndef HAVE_DRM_DRM_EXEC_H +#include +#include +#include +#define DRM_EXEC_INTERRUPTIBLE_WAIT BIT(0) +#define DRM_EXEC_IGNORE_DUPLICATES BIT(1) + +struct drm_gem_object; + +/** + * struct drm_exec - Execution context + */ +struct drm_exec { + /** + * @flags: Flags to control locking behavior + */ + uint32_t flags; + + /** + * @ticket: WW ticket used for acquiring locks + */ + struct ww_acquire_ctx ticket; + + /** + * @num_objects: number of objects locked + */ + unsigned int num_objects; + + /** + * @max_objects: maximum objects in array + */ + unsigned int max_objects; + + /** + * @objects: array of the locked objects + */ + struct drm_gem_object **objects; + + /** + * @contended: contended GEM object we backed off for + */ + struct drm_gem_object *contended; + + /** + * @prelocked: already locked GEM object due to contention + */ + struct drm_gem_object *prelocked; +}; + +/** + * drm_exec_for_each_locked_object - iterate over all the locked objects + * @exec: drm_exec object + * @index: unsigned long index for the iteration + * @obj: the current GEM object + * + * Iterate over all the locked GEM objects inside the drm_exec object. + */ +#define drm_exec_for_each_locked_object(exec, index, obj) \ + for (index = 0, obj = (exec)->objects[0]; \ + index < (exec)->num_objects; \ + ++index, obj = (exec)->objects[index]) + +/** + * drm_exec_until_all_locked - loop until all GEM objects are locked + * @exec: drm_exec object + * + * Core functionality of the drm_exec object. Loops until all GEM objects are + * locked and no more contention exists. At the beginning of the loop it is + * guaranteed that no GEM object is locked. + * + * Since labels can't be defined local to the loops body we use a jump pointer + * to make sure that the retry is only used from within the loops body. + */ +#define drm_exec_until_all_locked(exec) \ +__PASTE(__drm_exec_, __LINE__): \ + for (void *__drm_exec_retry_ptr; ({ \ + __drm_exec_retry_ptr = &&__PASTE(__drm_exec_, __LINE__);\ + (void)__drm_exec_retry_ptr; \ + drm_exec_cleanup(exec); \ + });) + +/** + * drm_exec_retry_on_contention - restart the loop to grap all locks + * @exec: drm_exec object + * + * Control flow helper to continue when a contention was detected and we need to + * clean up and re-start the loop to prepare all GEM objects. + */ +#define drm_exec_retry_on_contention(exec) \ + do { \ + if (unlikely(drm_exec_is_contended(exec))) \ + goto *__drm_exec_retry_ptr; \ + } while (0) + +/** + * drm_exec_is_contended - check for contention + * @exec: drm_exec object + * + * Returns true if the drm_exec object has run into some contention while + * locking a GEM object and needs to clean up. + */ +static inline bool drm_exec_is_contended(struct drm_exec *exec) +{ + return !!exec->contended; +} + +void drm_exec_init(struct drm_exec *exec, uint32_t flags); +void drm_exec_fini(struct drm_exec *exec); +bool drm_exec_cleanup(struct drm_exec *exec); +int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj); +void drm_exec_unlock_obj(struct drm_exec *exec, struct drm_gem_object *obj); +int drm_exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, + unsigned int num_fences); +int drm_exec_prepare_array(struct drm_exec *exec, + struct drm_gem_object **objects, + unsigned int num_objects, + unsigned int num_fences); +#endif +#endif diff --git a/include/kcl/kcl_drm_fb.h b/include/kcl/kcl_drm_fb.h new file mode 100644 index 0000000000000..9c0341fca3043 --- /dev/null +++ b/include/kcl/kcl_drm_fb.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2006-2009 Red Hat Inc. + * Copyright (c) 2006-2008 Intel Corporation + * Copyright (c) 2007 Dave Airlie + * + * DRM framebuffer helper functions + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Authors: + * Dave Airlie + * Jesse Barnes + */ +#ifndef KCL_KCL_DRM_FB_H +#define KCL_KCL_DRM_FB_H + +#include +#include +#include +#include + +/* Copied from include/drm/drm_fb_helper.h */ +/* + * Don't add fb_debug_* since the legacy drm_fb_helper_debug_* has segfault + * history: + * v2.6.35-21-gd219adc1228a fb: add hooks to handle KDB enter/exit + * v2.6.35-22-g1a7aba7f4e45 drm: add KGDB/KDB support + * v4.8-rc8-1391-g74064893901a drm/fb-helper: add DRM_FB_HELPER_DEFAULT_OPS for fb_ops + * v4.9-rc4-808-g1e0089288b9b drm/fb-helper: add fb_debug_* to DRM_FB_HELPER_DEFAULT_OPS + * v4.9-rc4-807-g1b99b72489c6 drm/fb-helper: fix segfaults in drm_fb_helper_debug_* + * v4.10-rc8-1367-g0f3bbe074dd1 drm/fb-helper: implement ioctl FBIO_WAITFORVSYNC + */ +#ifndef DRM_FB_HELPER_DEFAULT_OPS +#define DRM_FB_HELPER_DEFAULT_OPS \ + .fb_check_var = drm_fb_helper_check_var, \ + .fb_set_par = drm_fb_helper_set_par, \ + .fb_setcmap = drm_fb_helper_setcmap, \ + .fb_blank = drm_fb_helper_blank, \ + .fb_pan_display = drm_fb_helper_pan_display +#endif + +#ifndef HAVE_DRM_FB_HELPER_FILL_INFO +void drm_fb_helper_fill_info(struct fb_info *info, + struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes); +#endif + +#endif diff --git a/include/kcl/kcl_drm_fourcc.h b/include/kcl/kcl_drm_fourcc.h new file mode 100644 index 0000000000000..cc36737aafae0 --- /dev/null +++ b/include/kcl/kcl_drm_fourcc.h @@ -0,0 +1,245 @@ +/* + * Copyright 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef KCL_KCL_DRM_FOURCC_H +#define KCL_KCL_DRM_FOURCC_H + +#include + +/* Copied from include/uapi/drm/drm_fourcc.h */ +/* + * Linear Layout + * + * Just plain linear layout. Note that this is different from no specifying any + * modifier (e.g. not setting DRM_MODE_FB_MODIFIERS in the DRM_ADDFB2 ioctl), + * which tells the driver to also take driver-internal information into account + * and so might actually result in a tiled framebuffer. + */ +#if !defined(DRM_FORMAT_MOD_VENDOR_NONE) +#define DRM_FORMAT_MOD_VENDOR_NONE 0 +#endif + +#if !defined(DRM_FORMAT_MOD_LINEAR) +#define DRM_FORMAT_MOD_LINEAR fourcc_mod_code(NONE, 0) +#endif + +#if !defined(DRM_FORMAT_RESERVED) +#define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) +#endif +/* + * * Invalid Modifier + * * + * * This modifier can be used as a sentinel to terminate the format modifiers + * * list, or to initialize a variable with an invalid modifier. It might also be + * * used to report an error back to userspace for certain APIs. + * */ +#if !defined(DRM_FORMAT_MOD_INVALID) +#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED) +#endif + +/* + * AMD modifiers + * + * Memory layout: + * + * without DCC: + * - main surface + * + * with DCC & without DCC_RETILE: + * - main surface in plane 0 + * - DCC surface in plane 1 (RB-aligned, pipe-aligned if DCC_PIPE_ALIGN is set) + * + * with DCC & DCC_RETILE: + * - main surface in plane 0 + * - displayable DCC surface in plane 1 (not RB-aligned & not pipe-aligned) + * - pipe-aligned DCC surface in plane 2 (RB-aligned & pipe-aligned) + * + * For multi-plane formats the above surfaces get merged into one plane for + * each format plane, based on the required alignment only. + * + * Bits Parameter Notes + * ----- ------------------------ --------------------------------------------- + * + * 7:0 TILE_VERSION Values are AMD_FMT_MOD_TILE_VER_* + * 12:8 TILE Values are AMD_FMT_MOD_TILE__* + * 13 DCC + * 14 DCC_RETILE + * 15 DCC_PIPE_ALIGN + * 16 DCC_INDEPENDENT_64B + * 17 DCC_INDEPENDENT_128B + * 19:18 DCC_MAX_COMPRESSED_BLOCK Values are AMD_FMT_MOD_DCC_BLOCK_* + * 20 DCC_CONSTANT_ENCODE + * 23:21 PIPE_XOR_BITS Only for some chips + * 26:24 BANK_XOR_BITS Only for some chips + * 29:27 PACKERS Only for some chips + * 32:30 RB Only for some chips + * 35:33 PIPE Only for some chips + * 55:36 - Reserved for future use, must be zero + */ + +#ifndef AMD_FMT_MOD_TILE_VER_GFX11 +#define AMD_FMT_MOD_TILE_VER_GFX11 4 +#endif + +#ifndef AMD_FMT_MOD_TILE_VER_GFX12 +#define AMD_FMT_MOD_TILE_VER_GFX12 5 +#define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 +#endif + +#ifndef AMD_FMT_MOD_TILE_GFX11_256K_R_X +#define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31 +#endif + +#if !defined(AMD_FMT_MOD) +#define AMD_FMT_MOD fourcc_mod_code(AMD, 0) + +#define IS_AMD_FMT_MOD(val) (((val) >> 56) == DRM_FORMAT_MOD_VENDOR_AMD) + +/* Reserve 0 for GFX8 and older */ +#define AMD_FMT_MOD_TILE_VER_GFX9 1 +#define AMD_FMT_MOD_TILE_VER_GFX10 2 +#define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3 + +/* + * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical + * version. + */ +#define AMD_FMT_MOD_TILE_GFX9_64K_S 9 + +/* + * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has + * GFX9 as canonical version. + */ +#define AMD_FMT_MOD_TILE_GFX9_64K_D 10 +#define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 +#define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26 +#define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 + +#define AMD_FMT_MOD_DCC_BLOCK_64B 0 +#define AMD_FMT_MOD_DCC_BLOCK_128B 1 +#define AMD_FMT_MOD_DCC_BLOCK_256B 2 + +#define AMD_FMT_MOD_TILE_VERSION_SHIFT 0 +#define AMD_FMT_MOD_TILE_VERSION_MASK 0xFF +#define AMD_FMT_MOD_TILE_SHIFT 8 +#define AMD_FMT_MOD_TILE_MASK 0x1F + +/* Whether DCC compression is enabled. */ +#define AMD_FMT_MOD_DCC_SHIFT 13 +#define AMD_FMT_MOD_DCC_MASK 0x1 + +/* + * Whether to include two DCC surfaces, one which is rb & pipe aligned, and + * one which is not-aligned. + */ +#define AMD_FMT_MOD_DCC_RETILE_SHIFT 14 +#define AMD_FMT_MOD_DCC_RETILE_MASK 0x1 + +/* Only set if DCC_RETILE = false */ +#define AMD_FMT_MOD_DCC_PIPE_ALIGN_SHIFT 15 +#define AMD_FMT_MOD_DCC_PIPE_ALIGN_MASK 0x1 + +#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_SHIFT 16 +#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_SHIFT 17 +#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18 +#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 + +/* + * DCC supports embedding some clear colors directly in the DCC surface. + * However, on older GPUs the rendering HW ignores the embedded clear color + * and prefers the driver provided color. This necessitates doing a fastclear + * eliminate operation before a process transfers control. + * + * If this bit is set that means the fastclear eliminate is not needed for these + * embeddable colors. + */ +#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_SHIFT 20 +#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_MASK 0x1 + +/* + * The below fields are for accounting for per GPU differences. These are only + * relevant for GFX9 and later and if the tile field is *_X/_T. + * + * PIPE_XOR_BITS = always needed + * BANK_XOR_BITS = only for TILE_VER_GFX9 + * PACKERS = only for TILE_VER_GFX10_RBPLUS + * RB = only for TILE_VER_GFX9 & DCC + * PIPE = only for TILE_VER_GFX9 & DCC & (DCC_RETILE | DCC_PIPE_ALIGN) + */ +#define AMD_FMT_MOD_PIPE_XOR_BITS_SHIFT 21 +#define AMD_FMT_MOD_PIPE_XOR_BITS_MASK 0x7 +#define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 24 +#define AMD_FMT_MOD_BANK_XOR_BITS_MASK 0x7 +#define AMD_FMT_MOD_PACKERS_SHIFT 27 +#define AMD_FMT_MOD_PACKERS_MASK 0x7 +#define AMD_FMT_MOD_RB_SHIFT 30 +#define AMD_FMT_MOD_RB_MASK 0x7 +#define AMD_FMT_MOD_PIPE_SHIFT 33 +#define AMD_FMT_MOD_PIPE_MASK 0x7 + +#define AMD_FMT_MOD_SET(field, value) \ + ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) +#define AMD_FMT_MOD_GET(field, value) \ + (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) +#define AMD_FMT_MOD_CLEAR(field) \ + (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) +#endif + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [10:6] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian + */ +#ifndef DRM_FORMAT_P010 +#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel */ +#endif +/* + * Floating point 64bpp RGB + * IEEE 754-2008 binary16 half-precision float + * [15:0] sign:exponent:mantissa 1:5:10 + */ +#ifndef DRM_FORMAT_XRGB16161616F +#define DRM_FORMAT_XRGB16161616F fourcc_code('X', 'R', '4', 'H') /* [63:0] x:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_XBGR16161616F fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little endian */ + +#define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */ +#endif + +#ifndef DRM_FORMAT_ARGB16161616 +#define DRM_FORMAT_ARGB16161616 fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */ +#define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#endif + +#ifndef AMD_FMT_MOD_TILE_GFX12_256B_2D +#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1 +#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2 +#endif + +#ifndef AMD_FMT_MOD_TILE_GFX12_256K_2D +#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 +#endif + +#endif /* KCL_KCL_DRM_FOURCC_H */ diff --git a/include/kcl/kcl_drm_gem.h b/include/kcl/kcl_drm_gem.h new file mode 100644 index 0000000000000..73518a8a22f8b --- /dev/null +++ b/include/kcl/kcl_drm_gem.h @@ -0,0 +1,68 @@ +/* + * GEM Graphics Execution Manager Driver Interfaces + * + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * Copyright (c) 2009-2010, Code Aurora Forum. + * All rights reserved. + * Copyright © 2014 Intel Corporation + * Daniel Vetter + * + * Author: Rickard E. (Rik) Faith + * Author: Gareth Hughes + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KCL_KCL_DRM_GEM_H__ +#define __KCL_KCL_DRM_GEM_H__ + +#include +#if defined(HAVE_DRM_GEM_OBJECT_PUT) +#if defined(HAVE_DRM_GEM_OBJECT_PUT_SYMBOL) +static inline void +_kcl_drm_gem_object_put(struct drm_gem_object *obj) +{ + return drm_gem_object_put_unlocked(obj); +} +#endif +#else +static inline void +drm_gem_object_put(struct drm_gem_object *obj) +{ + return drm_gem_object_unreference_unlocked(obj); +} + +static inline void +drm_gem_object_get(struct drm_gem_object *obj) +{ + kref_get(&obj->refcount); +} +#endif /* HAVE_DRM_GEM_OBJECT_PUT */ + +/* copy from include/drm/drm_gem.h */ +#ifndef HAVE_DRM_GEM_OBJECT_IS_SHARED_FOR_MEMORY_STATS +static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_object *obj) +{ + return (obj->handle_count > 1) || obj->dma_buf; +} +#endif /* HAVE_DRM_GEM_OBJECT_IS_SHARED_FOR_MEMORY_STATS */ + +#endif diff --git a/include/kcl/kcl_drm_hdcp.h b/include/kcl/kcl_drm_hdcp.h new file mode 100644 index 0000000000000..76c7823fe6f88 --- /dev/null +++ b/include/kcl/kcl_drm_hdcp.h @@ -0,0 +1,323 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2017 Google, Inc. + * + * Authors: + * Sean Paul + */ +#ifndef AMDKCL_DRM_HDCP_H +#define AMDKCL_DRM_HDCP_H + +#ifdef CONFIG_DRM_AMD_DC_HDCP +#include +#include + +/* changed in v4.16-rc7-1717-gb8e47d87be65 + * drm: Fix HDCP downstream dev count read + */ +#ifdef DRM_HDCP_NUM_DOWNSTREAM +#undef DRM_HDCP_NUM_DOWNSTREAM +#define DRM_HDCP_NUM_DOWNSTREAM(x) (x & 0x7f) +#endif + +/* introduced in v5.3-rc1-377-g7672dbba85d3 + * drm: Add Content protection type property + */ +#ifndef DRM_MODE_HDCP_CONTENT_TYPE0 +#define DRM_MODE_HDCP_CONTENT_TYPE0 0 +#define DRM_MODE_HDCP_CONTENT_TYPE1 1 +#endif + +/* introduced in v4.19-rc2-1221-gaf5aad059885 + * drm: hdcp2.2 authentication msg definitions + */ +#ifndef DRM_HDCP_1_4_SRM_ID +#define DRM_HDCP_1_4_SRM_ID 0x8 +#define DRM_HDCP_1_4_VRL_LENGTH_SIZE 3 +#define DRM_HDCP_1_4_DCP_SIG_SIZE 40 + +/* Protocol message definition for HDCP2.2 specification */ +/* + * Protected content streams are classified into 2 types: + * - Type0: Can be transmitted with HDCP 1.4+ + * - Type1: Can be transmitted with HDCP 2.2+ + */ +#define HDCP_STREAM_TYPE0 0x00 +#define HDCP_STREAM_TYPE1 0x01 + +/* introduced in v4.15-rc4-1351-g495eb7f877ab + * drm: Add some HDCP related #defines + */ +#ifndef DRM_HDCP_KSV_LEN +#define DRM_HDCP_KSV_LEN 5 +#endif + +/* HDCP2.2 Msg IDs */ +#define HDCP_2_2_NULL_MSG 1 +#define HDCP_2_2_AKE_INIT 2 +#define HDCP_2_2_AKE_SEND_CERT 3 +#define HDCP_2_2_AKE_NO_STORED_KM 4 +#define HDCP_2_2_AKE_STORED_KM 5 +#define HDCP_2_2_AKE_SEND_HPRIME 7 +#define HDCP_2_2_AKE_SEND_PAIRING_INFO 8 +#define HDCP_2_2_LC_INIT 9 +#define HDCP_2_2_LC_SEND_LPRIME 10 +#define HDCP_2_2_SKE_SEND_EKS 11 +#define HDCP_2_2_REP_SEND_RECVID_LIST 12 +#define HDCP_2_2_REP_SEND_ACK 15 +#define HDCP_2_2_REP_STREAM_MANAGE 16 +#define HDCP_2_2_REP_STREAM_READY 17 +#define HDCP_2_2_ERRATA_DP_STREAM_TYPE 50 + +#define HDCP_2_2_RTX_LEN 8 +#define HDCP_2_2_RRX_LEN 8 + +#define HDCP_2_2_K_PUB_RX_MOD_N_LEN 128 +#define HDCP_2_2_K_PUB_RX_EXP_E_LEN 3 +#define HDCP_2_2_K_PUB_RX_LEN (HDCP_2_2_K_PUB_RX_MOD_N_LEN + \ + HDCP_2_2_K_PUB_RX_EXP_E_LEN) + +#define HDCP_2_2_DCP_LLC_SIG_LEN 384 + +#define HDCP_2_2_E_KPUB_KM_LEN 128 +#define HDCP_2_2_E_KH_KM_M_LEN (16 + 16) +#define HDCP_2_2_H_PRIME_LEN 32 +#define HDCP_2_2_E_KH_KM_LEN 16 +#define HDCP_2_2_RN_LEN 8 +#define HDCP_2_2_L_PRIME_LEN 32 +#define HDCP_2_2_E_DKEY_KS_LEN 16 +#define HDCP_2_2_RIV_LEN 8 +#define HDCP_2_2_SEQ_NUM_LEN 3 +#define HDCP_2_2_V_PRIME_HALF_LEN (HDCP_2_2_L_PRIME_LEN / 2) +#define HDCP_2_2_RECEIVER_ID_LEN DRM_HDCP_KSV_LEN +#define HDCP_2_2_MAX_DEVICE_COUNT 31 +#define HDCP_2_2_RECEIVER_IDS_MAX_LEN (HDCP_2_2_RECEIVER_ID_LEN * \ + HDCP_2_2_MAX_DEVICE_COUNT) +#define HDCP_2_2_MPRIME_LEN 32 + +/* Following Macros take a byte at a time for bit(s) masking */ +/* + * TODO: This has to be changed for DP MST, as multiple stream on + * same port is possible. + * For HDCP2.2 on HDMI and DP SST this value is always 1. + */ +#define HDCP_2_2_MAX_CONTENT_STREAMS_CNT 1 +#define HDCP_2_2_TXCAP_MASK_LEN 2 +#define HDCP_2_2_RXCAPS_LEN 3 +#define HDCP_2_2_RX_REPEATER(x) ((x) & BIT(0)) +#define HDCP_2_2_DP_HDCP_CAPABLE(x) ((x) & BIT(1)) +#define HDCP_2_2_RXINFO_LEN 2 + +/* HDCP1.x compliant device in downstream */ +#define HDCP_2_2_HDCP1_DEVICE_CONNECTED(x) ((x) & BIT(0)) + +/* HDCP2.0 Compliant repeater in downstream */ +#define HDCP_2_2_HDCP_2_0_REP_CONNECTED(x) ((x) & BIT(1)) +#define HDCP_2_2_MAX_CASCADE_EXCEEDED(x) ((x) & BIT(2)) +#define HDCP_2_2_MAX_DEVS_EXCEEDED(x) ((x) & BIT(3)) +#define HDCP_2_2_DEV_COUNT_LO(x) (((x) & (0xF << 4)) >> 4) +#define HDCP_2_2_DEV_COUNT_HI(x) ((x) & BIT(0)) +#define HDCP_2_2_DEPTH(x) (((x) & (0x7 << 1)) >> 1) + +struct hdcp2_cert_rx { + u8 receiver_id[HDCP_2_2_RECEIVER_ID_LEN]; + u8 kpub_rx[HDCP_2_2_K_PUB_RX_LEN]; + u8 reserved[2]; + u8 dcp_signature[HDCP_2_2_DCP_LLC_SIG_LEN]; +} __packed; + +struct hdcp2_streamid_type { + u8 stream_id; + u8 stream_type; +} __packed; + +/* + * The TxCaps field specified in the HDCP HDMI, DP specs + * This field is big endian as specified in the errata. + */ +struct hdcp2_tx_caps { + /* Transmitter must set this to 0x2 */ + u8 version; + + /* Reserved for HDCP and DP Spec. Read as Zero */ + u8 tx_cap_mask[HDCP_2_2_TXCAP_MASK_LEN]; +} __packed; + +/* Main structures for HDCP2.2 protocol communication */ +struct hdcp2_ake_init { + u8 msg_id; + u8 r_tx[HDCP_2_2_RTX_LEN]; + struct hdcp2_tx_caps tx_caps; +} __packed; + +struct hdcp2_ake_send_cert { + u8 msg_id; + struct hdcp2_cert_rx cert_rx; + u8 r_rx[HDCP_2_2_RRX_LEN]; + u8 rx_caps[HDCP_2_2_RXCAPS_LEN]; +} __packed; + +struct hdcp2_ake_no_stored_km { + u8 msg_id; + u8 e_kpub_km[HDCP_2_2_E_KPUB_KM_LEN]; +} __packed; + +struct hdcp2_ake_stored_km { + u8 msg_id; + u8 e_kh_km_m[HDCP_2_2_E_KH_KM_M_LEN]; +} __packed; + +struct hdcp2_ake_send_hprime { + u8 msg_id; + u8 h_prime[HDCP_2_2_H_PRIME_LEN]; +} __packed; + +struct hdcp2_ake_send_pairing_info { + u8 msg_id; + u8 e_kh_km[HDCP_2_2_E_KH_KM_LEN]; +} __packed; + +struct hdcp2_lc_init { + u8 msg_id; + u8 r_n[HDCP_2_2_RN_LEN]; +} __packed; + +struct hdcp2_lc_send_lprime { + u8 msg_id; + u8 l_prime[HDCP_2_2_L_PRIME_LEN]; +} __packed; + +struct hdcp2_ske_send_eks { + u8 msg_id; + u8 e_dkey_ks[HDCP_2_2_E_DKEY_KS_LEN]; + u8 riv[HDCP_2_2_RIV_LEN]; +} __packed; + +struct hdcp2_rep_send_receiverid_list { + u8 msg_id; + u8 rx_info[HDCP_2_2_RXINFO_LEN]; + u8 seq_num_v[HDCP_2_2_SEQ_NUM_LEN]; + u8 v_prime[HDCP_2_2_V_PRIME_HALF_LEN]; + u8 receiver_ids[HDCP_2_2_RECEIVER_IDS_MAX_LEN]; +} __packed; + +struct hdcp2_rep_send_ack { + u8 msg_id; + u8 v[HDCP_2_2_V_PRIME_HALF_LEN]; +} __packed; + +struct hdcp2_rep_stream_manage { + u8 msg_id; + u8 seq_num_m[HDCP_2_2_SEQ_NUM_LEN]; + __be16 k; + struct hdcp2_streamid_type streams[HDCP_2_2_MAX_CONTENT_STREAMS_CNT]; +} __packed; + +struct hdcp2_rep_stream_ready { + u8 msg_id; + u8 m_prime[HDCP_2_2_MPRIME_LEN]; +} __packed; + +struct hdcp2_dp_errata_stream_type { + u8 msg_id; + u8 stream_type; +} __packed; +#endif /* DRM_HDCP_1_4_SRM_ID */ + +/* introduced in v4.19-rc2-1222-g8b44fefee694 + * drm: HDMI and DP specific HDCP2.2 defines + */ +#ifndef HDCP_2_2_CERT_TIMEOUT_MS +/* HDCP2.2 TIMEOUTs in mSec */ +#define HDCP_2_2_CERT_TIMEOUT_MS 100 +#define HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS 1000 +#define HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS 200 +#define HDCP_2_2_PAIRING_TIMEOUT_MS 200 +#define HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS 20 +#define HDCP_2_2_DP_LPRIME_TIMEOUT_MS 7 +#define HDCP_2_2_RECVID_LIST_TIMEOUT_MS 3000 +#define HDCP_2_2_STREAM_READY_TIMEOUT_MS 100 + +/* HDMI HDCP2.2 Register Offsets */ +#define HDCP_2_2_HDMI_REG_VER_OFFSET 0x50 +#define HDCP_2_2_HDMI_REG_WR_MSG_OFFSET 0x60 +#define HDCP_2_2_HDMI_REG_RXSTATUS_OFFSET 0x70 +#define HDCP_2_2_HDMI_REG_RD_MSG_OFFSET 0x80 +#define HDCP_2_2_HDMI_REG_DBG_OFFSET 0xC0 + +#define HDCP_2_2_HDMI_SUPPORT_MASK BIT(2) +#define HDCP_2_2_RX_CAPS_VERSION_VAL 0x02 +#define HDCP_2_2_SEQ_NUM_MAX 0xFFFFFF +#define HDCP_2_2_DELAY_BEFORE_ENCRYPTION_EN 200 + +/* Below macros take a byte at a time and mask the bit(s) */ +#define HDCP_2_2_HDMI_RXSTATUS_LEN 2 +#define HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(x) ((x) & 0x3) +#define HDCP_2_2_HDMI_RXSTATUS_READY(x) ((x) & BIT(2)) +#define HDCP_2_2_HDMI_RXSTATUS_REAUTH_REQ(x) ((x) & BIT(3)) +/* DP HDCP2.2 parameter offsets in DPCD address space */ +#define DP_HDCP_2_2_REG_RTX_OFFSET 0x69000 +#define DP_HDCP_2_2_REG_TXCAPS_OFFSET 0x69008 +#define DP_HDCP_2_2_REG_CERT_RX_OFFSET 0x6900B +#define DP_HDCP_2_2_REG_RRX_OFFSET 0x69215 +#define DP_HDCP_2_2_REG_RX_CAPS_OFFSET 0x6921D +#define DP_HDCP_2_2_REG_EKPUB_KM_OFFSET 0x69220 +#define DP_HDCP_2_2_REG_EKH_KM_WR_OFFSET 0x692A0 +#define DP_HDCP_2_2_REG_M_OFFSET 0x692B0 +#define DP_HDCP_2_2_REG_HPRIME_OFFSET 0x692C0 +#define DP_HDCP_2_2_REG_EKH_KM_RD_OFFSET 0x692E0 +#define DP_HDCP_2_2_REG_RN_OFFSET 0x692F0 +#define DP_HDCP_2_2_REG_LPRIME_OFFSET 0x692F8 +#define DP_HDCP_2_2_REG_EDKEY_KS_OFFSET 0x69318 +#define DP_HDCP_2_2_REG_RIV_OFFSET 0x69328 +#define DP_HDCP_2_2_REG_RXINFO_OFFSET 0x69330 +#define DP_HDCP_2_2_REG_SEQ_NUM_V_OFFSET 0x69332 +#define DP_HDCP_2_2_REG_VPRIME_OFFSET 0x69335 +#define DP_HDCP_2_2_REG_RECV_ID_LIST_OFFSET 0x69345 +#define DP_HDCP_2_2_REG_V_OFFSET 0x693E0 +#define DP_HDCP_2_2_REG_SEQ_NUM_M_OFFSET 0x693F0 +#define DP_HDCP_2_2_REG_K_OFFSET 0x693F3 +#define DP_HDCP_2_2_REG_STREAM_ID_TYPE_OFFSET 0x693F5 +#define DP_HDCP_2_2_REG_MPRIME_OFFSET 0x69473 +#define DP_HDCP_2_2_REG_RXSTATUS_OFFSET 0x69493 +#define DP_HDCP_2_2_REG_STREAM_TYPE_OFFSET 0x69494 +#define DP_HDCP_2_2_REG_DBG_OFFSET 0x69518 + +/* DP HDCP message start offsets in DPCD address space */ +#define DP_HDCP_2_2_AKE_INIT_OFFSET DP_HDCP_2_2_REG_RTX_OFFSET +#define DP_HDCP_2_2_AKE_SEND_CERT_OFFSET DP_HDCP_2_2_REG_CERT_RX_OFFSET +#define DP_HDCP_2_2_AKE_NO_STORED_KM_OFFSET DP_HDCP_2_2_REG_EKPUB_KM_OFFSET +#define DP_HDCP_2_2_AKE_STORED_KM_OFFSET DP_HDCP_2_2_REG_EKH_KM_WR_OFFSET +#define DP_HDCP_2_2_AKE_SEND_HPRIME_OFFSET DP_HDCP_2_2_REG_HPRIME_OFFSET +#define DP_HDCP_2_2_AKE_SEND_PAIRING_INFO_OFFSET \ + DP_HDCP_2_2_REG_EKH_KM_RD_OFFSET +#define DP_HDCP_2_2_LC_INIT_OFFSET DP_HDCP_2_2_REG_RN_OFFSET +#define DP_HDCP_2_2_LC_SEND_LPRIME_OFFSET DP_HDCP_2_2_REG_LPRIME_OFFSET +#define DP_HDCP_2_2_SKE_SEND_EKS_OFFSET DP_HDCP_2_2_REG_EDKEY_KS_OFFSET +#define DP_HDCP_2_2_REP_SEND_RECVID_LIST_OFFSET DP_HDCP_2_2_REG_RXINFO_OFFSET +#define DP_HDCP_2_2_REP_SEND_ACK_OFFSET DP_HDCP_2_2_REG_V_OFFSET +#define DP_HDCP_2_2_REP_STREAM_MANAGE_OFFSET DP_HDCP_2_2_REG_SEQ_NUM_M_OFFSET +#define DP_HDCP_2_2_REP_STREAM_READY_OFFSET DP_HDCP_2_2_REG_MPRIME_OFFSET + +#define HDCP_2_2_DP_RXSTATUS_LEN 1 +#define HDCP_2_2_DP_RXSTATUS_READY(x) ((x) & BIT(0)) +#define HDCP_2_2_DP_RXSTATUS_H_PRIME(x) ((x) & BIT(1)) +#define HDCP_2_2_DP_RXSTATUS_PAIRING(x) ((x) & BIT(2)) +#define HDCP_2_2_DP_RXSTATUS_REAUTH_REQ(x) ((x) & BIT(3)) +#define HDCP_2_2_DP_RXSTATUS_LINK_FAILED(x) ((x) & BIT(4)) +#endif /* HDCP_2_2_CERT_TIMEOUT_MS */ + +#ifndef HAVE_DRM_HDCP_UPDATE_CONTENT_PROTECTION +void _kcl_drm_hdcp_update_content_protection(struct drm_connector *connector, + u64 val); +static inline +void drm_hdcp_update_content_protection(struct drm_connector *connector, + u64 val) +{ + _kcl_drm_hdcp_update_content_protection(connector, val); +} +#endif /* HAVE_DRM_HDCP_UPDATE_CONTENT_PROTECTION */ + +#endif /* CONFIG_DRM_AMD_DC_HDCP */ + +#endif /* AMDKCL_DRM_HDCP_H */ diff --git a/include/kcl/kcl_drm_modes.h b/include/kcl/kcl_drm_modes.h new file mode 100644 index 0000000000000..d3a387bad9988 --- /dev/null +++ b/include/kcl/kcl_drm_modes.h @@ -0,0 +1,36 @@ +/* + * Copyright © 2006 Keith Packard + * Copyright © 2007-2008 Dave Airlie + * Copyright © 2007-2008 Intel Corporation + * Jesse Barnes + * Copyright © 2014 Intel Corporation + * Daniel Vetter + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef KCL_KCL_DRM_MODES_H +#define KCL_KCL_DRM_MODES_H + +#include + +#ifndef HAVE_DRM_MODE_INIT +void drm_mode_init(struct drm_display_mode *dst, const struct drm_display_mode *src); +#endif + +#endif diff --git a/include/kcl/kcl_drm_modeset_lock.h b/include/kcl/kcl_drm_modeset_lock.h new file mode 100644 index 0000000000000..009e4af7a4c00 --- /dev/null +++ b/include/kcl/kcl_drm_modeset_lock.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_KCL_DRM_MODESET_LOCK_H_H_ +#define _KCL_KCL_DRM_MODESET_LOCK_H_H_ + +#include /* stackdepot.h is not self-contained */ +#include + +#ifndef DRM_MODESET_ACQUIRE_INTERRUPTIBLE +#define DRM_MODESET_ACQUIRE_INTERRUPTIBLE BIT(0) +#endif + +#endif diff --git a/include/kcl/kcl_drm_plane_helper.h b/include/kcl/kcl_drm_plane_helper.h new file mode 100644 index 0000000000000..6b3798e0da151 --- /dev/null +++ b/include/kcl/kcl_drm_plane_helper.h @@ -0,0 +1,14 @@ +#ifndef AMDKCL_DRM_PLANE_HELPER_H +#define AMDKCL_DRM_PLANE_HELPER_H + +#include + +#ifndef HAVE_DRM_PLANE_HELPER_DESTROY +static inline void kcl_drm_plane_helper_destroy(struct drm_plane *plane) +{ + drm_plane_cleanup(plane); + kfree(plane); +} +#define drm_plane_helper_destroy kcl_drm_plane_helper_destroy +#endif +#endif \ No newline at end of file diff --git a/include/kcl/kcl_drm_prime.h b/include/kcl/kcl_drm_prime.h new file mode 100644 index 0000000000000..c55e8d05c0318 --- /dev/null +++ b/include/kcl/kcl_drm_prime.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __KCL_DRM_PRIME_H__ +#define __KCL_DRM_PRIME_H__ + +#include +#include +#include + +#ifndef HAVE_DRM_PRIME_SG_TO_DMA_ADDR_ARRAY +static inline +int drm_prime_sg_to_dma_addr_array(struct sg_table *sgt, dma_addr_t *addrs, + int max_entries) +{ + return drm_prime_sg_to_page_addr_arrays(sgt, NULL, addrs, max_entries); +} +#endif /* HAVE_DRM_PRIME_SG_TO_DMA_ADDR_ARRAY */ +#endif diff --git a/include/kcl/kcl_drm_print.h b/include/kcl/kcl_drm_print.h new file mode 100644 index 0000000000000..0506b7a41f121 --- /dev/null +++ b/include/kcl/kcl_drm_print.h @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2016 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ +#ifndef AMDKCL_DRM_PRINT_H +#define AMDKCL_DRM_PRINT_H + +#include +#include +#include + +#ifndef _DRM_PRINTK +#define _DRM_PRINTK(once, level, fmt, ...) \ + do { \ + printk##once(KERN_##level "[" DRM_NAME "] " fmt, \ + ##__VA_ARGS__); \ + } while (0) +#endif + +#ifndef DRM_WARN +#define DRM_WARN(fmt, ...) \ + _DRM_PRINTK(, WARNING, fmt, ##__VA_ARGS__) +#endif + +#ifndef DRM_WARN_ONCE +#define DRM_WARN_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, WARNING, fmt, ##__VA_ARGS__) +#endif + +#ifndef drm_WARN_ONCE +#define drm_WARN_ONCE(drm, condition, format, arg...) \ + WARN_ONCE(condition, "%s %s: " format, \ + dev_driver_string((drm)->dev), \ + dev_name((drm)->dev), ## arg) +#endif + +#ifndef drm_WARN +#define drm_WARN(drm, condition, format, arg...) \ + WARN(condition, "%s %s: " format, \ + dev_driver_string((drm)->dev), \ + dev_name((drm)->dev), ## arg) +#endif + +#ifndef drm_WARN_ON +#define drm_WARN_ON(drm, x) \ + drm_WARN((drm), (x), "%s", \ + "drm_WARN_ON(" __stringify(x) ")") +#endif + +#ifndef DRM_NOTE +#define DRM_NOTE(fmt, ...) \ + _DRM_PRINTK(, NOTICE, fmt, ##__VA_ARGS__) +#endif + +#ifndef DRM_NOTE_ONCE +#define DRM_NOTE_ONCE(fmt, ...) \ + _DRM_PRINTK(_once, NOTICE, fmt, ##__VA_ARGS__) +#endif + +#ifndef drm_err +#define drm_err(drm, fmt, ...) \ + dev_err((drm)->dev, "[drm] " fmt, ##__VA_ARGS__) + +__printf(1, 2) +void kcl_drm_err(const char *format, ...); + +#undef DRM_ERROR +#define DRM_ERROR(fmt, ...) \ + kcl_drm_err(fmt, ##__VA_ARGS__) + +#else +#define HAVE_DRM_ERR_MACRO +#endif /* drm_err */ + +#ifndef drm_warn +#define drm_warn(drm, fmt, ...) \ + dev_warn((drm)->dev, "[drm] " fmt, ##__VA_ARGS__) +#endif /* drm_warn */ + +#ifndef drm_warn_once +#define drm_warn_once(drm, fmt, ...) \ + dev_warn_once((drm)->dev, "[drm] " fmt, ##__VA_ARGS__) +#endif /* drm_warn_once */ + +#if !defined(DRM_UT_VBL) +#define DRM_UT_VBL 0x20 +#endif + +#if !defined(DRM_DEV_DEBUG) +#define DRM_DEV_DEBUG(dev, fmt, ...) \ + DRM_DEBUG(fmt, ##__VA_ARGS__) +#endif + +#if !defined(DRM_DEV_ERROR) +#define DRM_DEV_ERROR(dev, fmt, ...) \ + DRM_ERROR(fmt, ##__VA_ARGS__) +#endif + +#ifndef DRM_DEV_INFO +/* NOTE: this is deprecated in favor of drm_info() or dev_info(). */ +#define DRM_DEV_INFO(dev, fmt, ...) \ + DRM_INFO(fmt, ##__VA_ARGS__) +#endif + +#ifndef DRM_DEBUG_VBL +#define DRM_DEBUG_VBL(fmt, args...) \ + do { \ + if (unlikely(drm_debug & DRM_UT_VBL)) \ + drm_ut_debug_printk(__func__, fmt, ##args); \ + } while (0) +#endif + +#if !defined(drm_dbg_atomic) +#define drm_dbg_atomic(drm, fmt, ...) \ + drm_dev_dbg((drm)->dev, DRM_UT_ATOMIC, fmt, ##__VA_ARGS__) +#endif + +#if !defined(drm_dbg_state) +#define drm_dbg_state(drm, fmt, ...) \ + drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_STATE, fmt, ##__VA_ARGS__) +#endif + +#if !defined(drm_dbg_vbl) +#define drm_dbg_vbl(drm, fmt, ...) \ + drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_VBL, fmt, ##__VA_ARGS__) +#endif + +#if !defined(drm_dbg_kms) +#define drm_dbg_kms(drm, fmt, ...) \ + drm_dev_dbg((drm)->dev, 0x04, fmt, ##__VA_ARGS__) +#endif + +#if !defined(drm_dbg_dp) +#define drm_dbg_dp(drm, fmt, ...) \ + drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DP, fmt, ##__VA_ARGS__) +#endif + +#if !defined(drm_dbg_driver) +#define drm_dbg_driver(drm, fmt, ...) \ + drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) +#endif + +#ifndef HAVE_DRM_DEBUG_ENABLED +/* Copied from v5.3-rc1-708-gf0a8f533adc2 include/drm/drm_print.h */ +static inline bool drm_debug_enabled(unsigned int category) +{ + return unlikely(drm_debug & category); +} +#endif /* HAVE_DRM_DEBUG_ENABLED */ + +/* Copied from v4.14-rc3-610-gbf6234a294c5 include/drm/drm_print.h */ +#ifndef drm_printf_indent +#define drm_printf_indent(printer, indent, fmt, ...) \ + drm_printf((printer), "%.*s" fmt, (indent), "\t\t\t\t\tX", ##__VA_ARGS__) +#endif + +#ifndef HAVE_DRM_PRINT_BITS +void drm_print_bits(struct drm_printer *p, unsigned long value, + const char * const bits[], unsigned int nbits); +#endif + +#ifndef HAVE_DRM_DEBUG_CATEGORY +enum drm_debug_category { + KCL_DRM_DEBUG_CATEGORY +}; +#endif + +#ifndef drm_info +/* + * struct drm_device based logging + * + * Prefer drm_device based logging over device or prink based logging. + */ + +/* Helper for struct drm_device based logging. */ +#define __drm_printk(drm, level, type, fmt, ...) \ + dev_##level##type((drm) ? (drm)->dev : NULL, "[drm] " fmt, ##__VA_ARGS__) + + +#define drm_info(drm, fmt, ...) \ + __drm_printk((drm), info,, fmt, ##__VA_ARGS__) +#endif + +#ifndef drm_info_once +/* copied from include/drm/drm_print.h */ +#define drm_info_once(drm, fmt, ...) \ + __drm_printk((drm), info, _once, fmt, ##__VA_ARGS__) +#endif + +#endif diff --git a/include/kcl/kcl_drm_simple_kms_helper.h b/include/kcl/kcl_drm_simple_kms_helper.h new file mode 100644 index 0000000000000..f6a5ac0c15d00 --- /dev/null +++ b/include/kcl/kcl_drm_simple_kms_helper.h @@ -0,0 +1,22 @@ +/*SPDX-License-Identifier: GPL-2.0*/ +/* + * Copyright (C) 2016 Noralf Trønnes + */ + +#include +#include +#include +#include + +#ifndef HAVE_DRM_SIMPLE_ENCODER_INIT +extern int _kcl_drm_simple_encoder_init(struct drm_device *dev, + struct drm_encoder *encoder, + int encoder_type); +static inline +int drm_simple_encoder_init(struct drm_device *dev, + struct drm_encoder *encoder, + int encoder_type) +{ + return _kcl_drm_simple_encoder_init(dev,encoder,encoder_type); +} +#endif diff --git a/include/kcl/kcl_drm_suballoc.h b/include/kcl/kcl_drm_suballoc.h new file mode 100644 index 0000000000000..46c61883e392f --- /dev/null +++ b/include/kcl/kcl_drm_suballoc.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2011 Red Hat Inc. + * Copyright © 2022 Intel Corporation + */ +#ifndef _KCL_DRM_SUBALLOC_H_ +#define _KCL_DRM_SUBALLOC_H_ + +#include + +#ifndef HAVE_DRM_DRM_SUBALLOC_H + +#include + +#include +#include + +#define DRM_SUBALLOC_MAX_QUEUES 32 +/** + * struct drm_suballoc_manager - fenced range allocations + * @wq: Wait queue for sleeping allocations on contention. + * @hole: Pointer to first hole node. + * @olist: List of allocated ranges. + * @flist: Array[fence context hash] of queues of fenced allocated ranges. + * @size: Size of the managed range. + * @align: Default alignment for the managed range. + */ +struct drm_suballoc_manager { + wait_queue_head_t wq; + struct list_head *hole; + struct list_head olist; + struct list_head flist[DRM_SUBALLOC_MAX_QUEUES]; + size_t size; + size_t align; +}; + +/** + * struct drm_suballoc - Sub-allocated range + * @olist: List link for list of allocated ranges. + * @flist: List linkk for the manager fenced allocated ranges queues. + * @manager: The drm_suballoc_manager. + * @soffset: Start offset. + * @eoffset: End offset + 1 so that @eoffset - @soffset = size. + * @dma_fence: The fence protecting the allocation. + */ +struct drm_suballoc { + struct list_head olist; + struct list_head flist; + struct drm_suballoc_manager *manager; + size_t soffset; + size_t eoffset; + struct dma_fence *fence; +}; + +void drm_suballoc_manager_init(struct drm_suballoc_manager *sa_manager, + size_t size, size_t align); + +void drm_suballoc_manager_fini(struct drm_suballoc_manager *sa_manager); + +struct drm_suballoc * +drm_suballoc_new(struct drm_suballoc_manager *sa_manager, size_t size, + gfp_t gfp, bool intr, size_t align); + +void drm_suballoc_free(struct drm_suballoc *sa, struct dma_fence *fence); + +/** + * drm_suballoc_soffset - Range start. + * @sa: The struct drm_suballoc. + * + * Return: The start of the allocated range. + */ +static inline size_t drm_suballoc_soffset(struct drm_suballoc *sa) +{ + return sa->soffset; +} + +/** + * drm_suballoc_eoffset - Range end. + * @sa: The struct drm_suballoc. + * + * Return: The end of the allocated range + 1. + */ +static inline size_t drm_suballoc_eoffset(struct drm_suballoc *sa) +{ + return sa->eoffset; +} + +/** + * drm_suballoc_size - Range size. + * @sa: The struct drm_suballoc. + * + * Return: The size of the allocated range. + */ +static inline size_t drm_suballoc_size(struct drm_suballoc *sa) +{ + return sa->eoffset - sa->soffset; +} + +#ifdef CONFIG_DEBUG_FS +void drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager, + struct drm_printer *p, + unsigned long long suballoc_base); +#else +static inline void +drm_suballoc_dump_debug_info(struct drm_suballoc_manager *sa_manager, + struct drm_printer *p, + unsigned long long suballoc_base) +{ } + +#endif +#endif /*HAVE_DRM_DRM_SUBALLOC_H*/ + +#endif /* _KCL_DRM_SUBALLOC_H_ */ diff --git a/include/kcl/kcl_drm_vblank.h b/include/kcl/kcl_drm_vblank.h new file mode 100644 index 0000000000000..4a74049654c92 --- /dev/null +++ b/include/kcl/kcl_drm_vblank.h @@ -0,0 +1,36 @@ +/* + * Copyright 2016 Intel Corp. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_KCL_DRM_VBLANK_H +#define _KCL_KCL_DRM_VBLANK_H + +#include +#include +#include + +/*copy from include/drm/drm_vblank.h */ +#ifndef HAVE_CRTC_DRM_VBLANK_CRTC +struct drm_vblank_crtc *drm_crtc_vblank_crtc(struct drm_crtc *crtc); +#endif + +#endif /*_KCL_KCL_DRM_VBLANK_H */ diff --git a/include/kcl/kcl_drm_writeback.h b/include/kcl/kcl_drm_writeback.h new file mode 100644 index 0000000000000..14b6d63f4b4b3 --- /dev/null +++ b/include/kcl/kcl_drm_writeback.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * Author: Brian Starkey + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + */ +#ifndef AMDKCL_DRM_WRITEBACK_H +#define AMDKCL_DRM_WRITEBACK_H + +#include + +#ifndef HAVE_DRM_WRITEBACK_CONNECTOR_INIT_7_ARGS +static inline int _kcl_drm_writeback_connector_init(struct drm_device *dev, + struct drm_writeback_connector *wb_connector, + const struct drm_connector_funcs *con_funcs, + const struct drm_encoder_helper_funcs *enc_helper_funcs, + const u32 *formats, int n_formats, + u32 possible_crtcs) +{ + wb_connector->encoder.possible_crtcs = possible_crtcs; + + return drm_writeback_connector_init(dev, wb_connector, con_funcs, enc_helper_funcs, formats, n_formats); +} +#define drm_writeback_connector_init _kcl_drm_writeback_connector_init +#endif + +#endif \ No newline at end of file diff --git a/include/kcl/kcl_dynamic_debug.h b/include/kcl/kcl_dynamic_debug.h new file mode 100644 index 0000000000000..0d5ad3a9d2d52 --- /dev/null +++ b/include/kcl/kcl_dynamic_debug.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef AMDKCL_DYNAMIC_DEBUG_H +#define AMDKCL_DYNAMIC_DEBUG_H + +#include + +#ifndef DECLARE_DYNDBG_CLASSMAP +enum class_map_type { + DD_CLASS_TYPE_DISJOINT_BITS, + /** + * DD_CLASS_TYPE_DISJOINT_BITS: classes are independent, one per bit. + * expecting hex input. Built for drm.debug, basis for other types. + */ + DD_CLASS_TYPE_LEVEL_NUM, + /** + * DD_CLASS_TYPE_LEVEL_NUM: input is numeric level, 0-N. + * N turns on just bits N-1 .. 0, so N=0 turns all bits off. + */ + DD_CLASS_TYPE_DISJOINT_NAMES, + /** + * DD_CLASS_TYPE_DISJOINT_NAMES: input is a CSV of [+-]CLASS_NAMES, + * classes are independent, like _DISJOINT_BITS. + */ + DD_CLASS_TYPE_LEVEL_NAMES, + /** + * DD_CLASS_TYPE_LEVEL_NAMES: input is a CSV of [+-]CLASS_NAMES, + * intended for names like: INFO,DEBUG,TRACE, with a module prefix + * avoid EMERG,ALERT,CRIT,ERR,WARNING: they're not debug + */ +}; + +struct ddebug_class_map { + struct list_head link; + struct module *mod; + const char *mod_name; /* needed for builtins */ + const char **class_names; + const int length; + const int base; /* index of 1st .class_id, allows split/shared space */ + enum class_map_type map_type; +}; + +/** + * DECLARE_DYNDBG_CLASSMAP - declare classnames known by a module + * @_var: a struct ddebug_class_map, passed to module_param_cb + * @_type: enum class_map_type, chooses bits/verbose, numeric/symbolic + * @_base: offset of 1st class-name. splits .class_id space + * @classes: class-names used to control class'd prdbgs + */ +#define DECLARE_DYNDBG_CLASSMAP(_var, _maptype, _base, ...) \ + static const char *_var##_classnames[] = { __VA_ARGS__ }; \ + static struct ddebug_class_map __aligned(8) __used \ + __section("__dyndbg_classes") _var = { \ + .mod = THIS_MODULE, \ + .mod_name = KBUILD_MODNAME, \ + .base = _base, \ + .map_type = _maptype, \ + .length = NUM_TYPE_ARGS(char*, __VA_ARGS__), \ + .class_names = _var##_classnames, \ + } +#define NUM_TYPE_ARGS(eltype, ...) \ + (sizeof((eltype[]){__VA_ARGS__}) / sizeof(eltype)) + +#endif + +#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG) +#ifndef _dynamic_func_call_no_desc +#define __dynamic_func_call_no_desc(id, fmt, func, ...) do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(id)) \ + func(__VA_ARGS__); \ +} while (0) + +#define _dynamic_func_call_no_desc(fmt, func, ...) \ + __dynamic_func_call_no_desc(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) +#endif /* _dynamic_func_call_no_desc */ +#endif /* CONFIG_DYNAMIC_DEBUG */ +#endif /* AMDKCL_DYNAMIC_DEBUG_H */ diff --git a/include/kcl/kcl_eventpoll.h b/include/kcl/kcl_eventpoll.h new file mode 100644 index 0000000000000..5f23d49a7e46f --- /dev/null +++ b/include/kcl/kcl_eventpoll.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * include/linux/eventpoll.h ( Efficient event polling implementation ) + * Copyright (C) 2001,...,2006 Davide Libenzi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Davide Libenzi + * + */ +#ifndef AMDKCL_EVENTPOLL_H +#define AMDKCL_EVENTPOLL_H + +#include + +/* Copied from include/uapi/linux/eventpoll.h */ +#ifndef EPOLLIN +#define EPOLLIN 0x00000001 +#define EPOLLPRI 0x00000002 +#define EPOLLOUT 0x00000004 +#define EPOLLERR 0x00000008 +#define EPOLLHUP 0x00000010 +#define EPOLLRDNORM 0x00000040 +#define EPOLLRDBAND 0x00000080 +#define EPOLLWRNORM 0x00000100 +#define EPOLLWRBAND 0x00000200 +#define EPOLLMSG 0x00000400 +#define EPOLLRDHUP 0x00002000 +#endif +#endif diff --git a/include/kcl/kcl_fdtable.h b/include/kcl/kcl_fdtable.h new file mode 100644 index 0000000000000..f6829418719c5 --- /dev/null +++ b/include/kcl/kcl_fdtable.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_FDTABLE_H +#define _KCL_FDTABLE_H + +#ifndef HAVE_KERNEL_CLOSE_FD +#include +#ifdef HAVE_KSYS_CLOSE_FD +#define close_fd ksys_close +#else +#define close_fd sys_close +#endif +#endif + +#endif diff --git a/include/kcl/kcl_fence.h b/include/kcl/kcl_fence.h new file mode 100644 index 0000000000000..a4a94e8a03e54 --- /dev/null +++ b/include/kcl/kcl_fence.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Fence mechanism for dma-buf to allow for asynchronous dma access + * + * Copyright (C) 2012 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + */ +#ifndef AMDKCL_FENCE_H +#define AMDKCL_FENCE_H + +#include +#include +#include + +#if !defined(HAVE__DMA_FENCE_IS_LATER_2ARGS) + +#if !defined(HAVE_DMA_FENCE_OPS_USE_64BIT_SEQNO) +static inline bool __dma_fence_is_later(u64 f1, u64 f2) +{ + + /* This is for backward compatibility with drivers which can only handle + * 32bit sequence numbers. Use a 64bit compare when any of the higher + * bits are none zero, otherwise use a 32bit compare with wrap around + * handling. + */ + if (upper_32_bits(f1) || upper_32_bits(f2)) + return f1 > f2; + + return (int)(lower_32_bits(f1) - lower_32_bits(f2)) > 0; +} + +#elif !defined(HAVE__DMA_FENCE_IS_LATER_WITH_OPS_ARG) && \ + defined(HAVE_DMA_FENCE_OPS_USE_64BIT_SEQNO) +static inline bool __dma_fence_is_later(u64 f1, u64 f2, + const struct dma_fence_ops *ops) +{ + /* This is for backward compatibility with drivers which can only handle + * 32bit sequence numbers. Use a 64bit compare when the driver says to + * do so. + */ + if (ops->use_64bit_seqno) + return f1 > f2; + + return (int)(lower_32_bits(f1) - lower_32_bits(f2)) > 0; +} + +#endif +#endif /* HAVE__DMA_FENCE_IS_LATER_2ARGS */ + +/* + * commit v4.18-rc2-533-g418cc6ca0607 + * dma-fence: Allow wait_any_timeout for all fences) + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 19, 0) +#define AMDKCL_FENCE_WAIT_ANY_TIMEOUT +signed long +_kcl_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, + bool intr, signed long timeout, uint32_t *idx); +#endif + +/* + * commit v4.9-rc2-472-gbcc004b629d2 + * dma-buf/fence: make timeout handling in fence_default_wait consistent (v2)) + * + * commit v4.9-rc2-473-g698c0f7ff216 + * dma-buf/fence: revert "don't wait when specified timeout is zero" (v2) + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 10, 0) +#define AMDKCL_FENCE_DEFAULT_WAIT_TIMEOUT +signed long +_kcl_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout); +extern signed long _kcl_fence_wait_timeout(struct fence *fence, bool intr, + signed long timeout); +#endif + +/* + * commit v4.14-rc3-601-g5f72db59160c + * dma-buf/fence: Sparse wants __rcu on the object itself + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 15, 0) +#define AMDKCL_FENCE_GET_RCU_SAFE +static inline struct dma_fence * +_kcl_fence_get_rcu_safe(struct dma_fence __rcu **fencep) +{ + do { + struct dma_fence *fence; + + fence = rcu_dereference(*fencep); + if (!fence) + return NULL; + + if (!dma_fence_get_rcu(fence)) + continue; + + /* The atomic_inc_not_zero() inside dma_fence_get_rcu() + * provides a full memory barrier upon success (such as now). + * This is paired with the write barrier from assigning + * to the __rcu protected fence pointer so that if that + * pointer still matches the current fence, we know we + * have successfully acquire a reference to it. If it no + * longer matches, we are holding a reference to some other + * reallocated pointer. This is possible if the allocator + * is using a freelist like SLAB_TYPESAFE_BY_RCU where the + * fence remains valid for the RCU grace period, but it + * may be reallocated. When using such allocators, we are + * responsible for ensuring the reference we get is to + * the right fence, as below. + */ + if (fence == rcu_access_pointer(*fencep)) + return rcu_pointer_handoff(fence); + + dma_fence_put(fence); + } while (1); +} +#endif + +/* + * commit v4.18-rc2-519-gc701317a3eb8 + * dma-fence: Make ->enable_signaling optional + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 19, 0) +#define AMDKCL_DMA_FENCE_OPS_ENABLE_SIGNALING +bool _kcl_fence_enable_signaling(struct dma_fence *f); +#define AMDKCL_DMA_FENCE_OPS_ENABLE_SIGNALING_OPTIONAL \ + .enable_signaling = _kcl_fence_enable_signaling, +#else +#define AMDKCL_DMA_FENCE_OPS_ENABLE_SIGNALING_OPTIONAL +#endif + +/* + * commit v4.18-rc2-533-g418cc6ca0607 + * dma-fence: Make ->wait callback optional + */ +#if DRM_VERSION_CODE < DRM_VERSION(4, 19, 0) +#define AMDKCL_DMA_FENCE_OPS_WAIT_OPTIONAL \ + .wait = dma_fence_default_wait, +#else +#define AMDKCL_DMA_FENCE_OPS_WAIT_OPTIONAL +#endif + +#if !defined(HAVE_DMA_FENCE_DESCRIBE) +void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq); +#endif + +#endif /* AMDKCL_FENCE_H */ diff --git a/include/kcl/kcl_fs.h b/include/kcl/kcl_fs.h new file mode 100644 index 0000000000000..633a6edfd8f17 --- /dev/null +++ b/include/kcl/kcl_fs.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_FS_H +#define AMDKCL_FS_H + +#include +#include + +/* Copied from v5.4-rc2-1-g2952db0fd51b linux/fs.h */ +#ifndef HAVE_COMPAT_PTR_IOCTL +#ifdef CONFIG_COMPAT +extern long _kcl_compat_ptr_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +static inline long compat_ptr_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return _kcl_compat_ptr_ioctl(file, cmd, arg); +} +#else +#define compat_ptr_ioctl NULL +#endif /* CONFIG_COMPAT */ +#endif /* HAVE_COMPAT_PTR_IOCTL */ + +#ifndef HAVE_KERNEL_WRITE_PPOS +ssize_t _kcl_kernel_write(struct file *file, const void *buf, size_t count, + loff_t *pos); +#endif + +#endif diff --git a/include/kcl/kcl_highmem-internal.h b/include/kcl/kcl_highmem-internal.h new file mode 100644 index 0000000000000..7304f188d7c2d --- /dev/null +++ b/include/kcl/kcl_highmem-internal.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef KCL_KCL_HIGHMEM_INTERNAL_H +#define KCL_KCL_HIGHMEM_INTERNAL_H + +#include +#include + +#ifndef HAVE_KMAP_LOCAL + +static inline void *kmap_local_page(struct page *page) +{ + return page_address(page); +} + +static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + return kmap_local_page(page); +} + +#endif + +#ifndef kunmap_local + +static inline void __kunmap_local(void *addr) +{ +#ifdef ARCH_HAS_FLUSH_ON_KUNMAP + kunmap_flush_on_unmap(addr); +#endif +} + +#define kunmap_local(__addr) \ +do { \ + BUILD_BUG_ON(__same_type((__addr), struct page *)); \ + __kunmap_local(__addr); \ +} while (0) +#endif /* kunmap_local */ + + + +#endif diff --git a/include/kcl/kcl_hypervisor.h b/include/kcl/kcl_hypervisor.h new file mode 100644 index 0000000000000..60521c70b9ba5 --- /dev/null +++ b/include/kcl/kcl_hypervisor.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_HYPERVISOR_H +#define AMDKCL_HYPERVISOR_H + +#include + +#ifdef CONFIG_X86 +#if !defined(HAVE_X86_HYPERVISOR_TYPE) +enum x86_hypervisor_type { + X86_HYPER_NATIVE = 0, + X86_HYPER_VMWARE, + X86_HYPER_MS_HYPERV, + X86_HYPER_XEN_PV, + X86_HYPER_XEN_HVM, + X86_HYPER_KVM, + X86_HYPER_JAILHOUSE, + X86_HYPER_ACRN, +}; +#endif + +#ifndef HAVE_HYPERVISOR_IS_TYPE +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return false; +} +#endif + +#endif /* CONFIG_X86 */ +#endif /* AMDKCL_HYPERVISOR_H */ diff --git a/include/kcl/kcl_i2c.h b/include/kcl/kcl_i2c.h new file mode 100644 index 0000000000000..2e7f36acdeadc --- /dev/null +++ b/include/kcl/kcl_i2c.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * i2c.h - definitions for the Linux i2c bus interface + * Copyright (C) 1995-2000 Simon G. Vogl + * Copyright (C) 2013-2019 Wolfram Sang + * + * With some changes from Kyösti Mälkki and + * Frodo Looijaard + */ +#ifndef _KCL_KCL_I2C_H +#define _KCL_KCL_I2C_H + +#include + +#ifdef HAVE_I2C_NEW_CLIENT_DEVICE +extern struct i2c_client * +i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info); +#else +static inline struct i2c_client * +i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info) +{ + return i2c_new_device(adap, info); +} +#endif + +#ifndef I2C_AQ_NO_ZERO_LEN +#define I2C_AQ_NO_ZERO_LEN_READ BIT(5) +#define I2C_AQ_NO_ZERO_LEN_WRITE BIT(6) +#define I2C_AQ_NO_ZERO_LEN (I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE) +#endif + + +#endif diff --git a/include/kcl/kcl_idr.h b/include/kcl/kcl_idr.h new file mode 100644 index 0000000000000..1cdea5ec45d67 --- /dev/null +++ b/include/kcl/kcl_idr.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * include/linux/idr.h + * + * 2002-10-18 written by Jim Houston jim.houston@ccur.com + * Copyright (C) 2002 by Concurrent Computer Corporation + * + * Small id to pointer translation service avoiding fixed sized + * tables. + */ +#ifndef AMDKCL_IDR_H +#define AMDKCL_IDR_H + +#include + +/* Copied from v4.4-rc2-61-ga55bbd375d18 include/linux/idr.h */ +#ifndef idr_for_each_entry_continue +#define idr_for_each_entry_continue(idr, entry, id) \ + for ((entry) = idr_get_next((idr), &(id)); \ + entry; \ + ++id, (entry) = idr_get_next((idr), &(id))) +#endif + +#ifndef HAVE_IDR_REMOVE_RETURN_VOID_POINTER +static inline void *_kcl_idr_remove(struct idr *idr, int id) +{ + void *ptr; + + ptr = idr_find(idr, id); + if (ptr) + idr_remove(idr, id); + + return ptr; +} +#define idr_remove _kcl_idr_remove +#endif /* HAVE_IDR_REMOVE_RETURN_VOID_POINTER */ + +#ifndef HAVE_IDR_INIT_BASE +#ifdef HAVE_STRUCT_IDE_IDR_BASE +static inline void kc_idr_init_base(struct idr *idr, int base) +{ + INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); + idr->idr_base = base; + idr->idr_next = 0; +} +#else +static inline void kc_idr_init_base(struct idr *idr, int base) +{ + idr_init(idr); +} +#endif +#define idr_init_base kc_idr_init_base +#endif +#endif /* AMDKCL_IDR_H */ diff --git a/include/kcl/kcl_intel_family.h b/include/kcl/kcl_intel_family.h new file mode 100644 index 0000000000000..90793a772861b --- /dev/null +++ b/include/kcl/kcl_intel_family.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_INTEL_FAMILY_H +#define AMDKCL_INTEL_FAMILY_H + +#ifdef CONFIG_X86 + +#include +/* Copied froma asm/intel-family.h*/ +#ifndef INTEL_FAM6_ROCKETLAKE +#define INTEL_FAM6_ROCKETLAKE 0xA7 +#endif + +#ifndef INTEL_FAM6_ALDERLAKE +#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ +#endif + +#endif /* CONFIG_X86 */ +#endif diff --git a/include/kcl/kcl_io-mapping.h b/include/kcl/kcl_io-mapping.h new file mode 100644 index 0000000000000..6551e94cf3551 --- /dev/null +++ b/include/kcl/kcl_io-mapping.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2008 Keith Packard + */ + +#ifndef KCL_KCL_IO_MAPPING_H +#define KCL_KCL_IO_MAPPING_H + +#include + +#ifndef HAVE_IO_MAPPING_UNMAP_LOCAL +static inline void io_mapping_unmap_local(void __iomem *vaddr) +{ + io_mapping_unmap(vaddr); +} +#endif + +#ifndef HAVE_IO_MAPPING_MAP_LOCAL_WC +static inline void __iomem * +io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset) +{ + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); +} +#endif + +#endif /* KCL_KCL_IO_MAPPING_H */ diff --git a/include/kcl/kcl_iosys-map.h b/include/kcl/kcl_iosys-map.h new file mode 100644 index 0000000000000..d35ce3a8f5c58 --- /dev/null +++ b/include/kcl/kcl_iosys-map.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Pointer abstraction for IO/system memory + * Copied from include/kcl/iosys-map.h + */ + +#ifndef _KCL_KCL__IOSYS_MAP_H__H__ +#define _KCL_KCL__IOSYS_MAP_H__H__ + +#include + +#ifndef HAVE_LINUX_IOSYS_MAP_H +#include +#include + +/** + * struct iosys_map - Pointer to IO/system memory + * @vaddr_iomem: The buffer's address if in I/O memory + * @vaddr: The buffer's address if in system memory + * @is_iomem: True if the buffer is located in I/O memory, or false + * otherwise. + */ +struct iosys_map { + union { + void __iomem *vaddr_iomem; + void *vaddr; + }; + bool is_iomem; +}; + +/** + * IOSYS_MAP_INIT_VADDR - Initializes struct iosys_map to an address in system memory + * @vaddr_: A system-memory address + */ +#define IOSYS_MAP_INIT_VADDR(vaddr_) \ + { \ + .vaddr = (vaddr_), \ + .is_iomem = false, \ + } + + +/** + * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in system memory + * @map: The iosys_map structure + * @vaddr: A system-memory address + * + * Sets the address and clears the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr(struct iosys_map *map, void *vaddr) +{ + map->vaddr = vaddr; + map->is_iomem = false; +} + +/** + * iosys_map_set_vaddr_iomem - Sets a iosys mapping structure to an address in I/O memory + * @map: The iosys_map structure + * @vaddr_iomem: An I/O-memory address + * + * Sets the address and the I/O-memory flag. + */ +static inline void iosys_map_set_vaddr_iomem(struct iosys_map *map, + void __iomem *vaddr_iomem) +{ + map->vaddr_iomem = vaddr_iomem; + map->is_iomem = true; +} + +/** + * iosys_map_is_equal - Compares two iosys mapping structures for equality + * @lhs: The iosys_map structure + * @rhs: A iosys_map structure to compare with + * + * Two iosys mapping structures are equal if they both refer to the same type of memory + * and to the same address within that memory. + * + * Returns: + * True is both structures are equal, or false otherwise. + */ +static inline bool iosys_map_is_equal(const struct iosys_map *lhs, + const struct iosys_map *rhs) +{ + if (lhs->is_iomem != rhs->is_iomem) + return false; + else if (lhs->is_iomem) + return lhs->vaddr_iomem == rhs->vaddr_iomem; + else + return lhs->vaddr == rhs->vaddr; +} + +/** + * iosys_map_is_null - Tests for a iosys mapping to be NULL + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping is NULL. + * + * Returns: + * True if the mapping is NULL, or false otherwise. + */ +static inline bool iosys_map_is_null(const struct iosys_map *map) +{ + if (map->is_iomem) + return !map->vaddr_iomem; + return !map->vaddr; +} + +/** + * iosys_map_is_set - Tests if the iosys mapping has been set + * @map: The iosys_map structure + * + * Depending on the state of struct iosys_map.is_iomem, tests if the + * mapping has been set. + * + * Returns: + * True if the mapping is been set, or false otherwise. + */ +static inline bool iosys_map_is_set(const struct iosys_map *map) +{ + return !iosys_map_is_null(map); +} + +/** + * iosys_map_clear - Clears a iosys mapping structure + * @map: The iosys_map structure + * + * Clears all fields to zero, including struct iosys_map.is_iomem, so + * mapping structures that were set to point to I/O memory are reset for + * system memory. Pointers are cleared to NULL. This is the default. + */ +static inline void iosys_map_clear(struct iosys_map *map) +{ + if (map->is_iomem) { + map->vaddr_iomem = NULL; + map->is_iomem = false; + } else { + map->vaddr = NULL; + } +} + +/** + * iosys_map_memcpy_to - Memcpy into offset of iosys_map + * @dst: The iosys_map structure + * @dst_offset: The offset from which to copy + * @src: The source buffer + * @len: The number of byte in src + * + * Copies data into a iosys_map with an offset. The source buffer is in + * system memory. Depending on the buffer's location, the helper picks the + * correct method of accessing the memory. + */ +static inline void iosys_map_memcpy_to(struct iosys_map *dst, size_t dst_offset, + const void *src, size_t len) +{ + if (dst->is_iomem) + memcpy_toio(dst->vaddr_iomem + dst_offset, src, len); + else + memcpy(dst->vaddr + dst_offset, src, len); +} + +/** + * iosys_map_incr - Increments the address stored in a iosys mapping + * @map: The iosys_map structure + * @incr: The number of bytes to increment + * + * Increments the address stored in a iosys mapping. Depending on the + * buffer's location, the correct value will be updated. + */ +static inline void iosys_map_incr(struct iosys_map *map, size_t incr) +{ + if (map->is_iomem) + map->vaddr_iomem += incr; + else + map->vaddr += incr; +} + +#endif /* HAVE_LINUX_IOSYS_MAP_H */ + +#endif diff --git a/include/kcl/kcl_irqdesc.h b/include/kcl/kcl_irqdesc.h new file mode 100644 index 0000000000000..1e439ea146d7c --- /dev/null +++ b/include/kcl/kcl_irqdesc.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef AMDKCL_IRQDESC_H +#define AMDKCL_IRQDESC_H + +#ifndef HAVE_GENERIC_HANDLE_DOMAIN_IRQ +int kcl_generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); +#define generic_handle_domain_irq kcl_generic_handle_domain_irq +#endif /* HAVE_GENERIC_HANDLE_DOMAIN_IRQ */ + +#endif diff --git a/include/kcl/kcl_kernel.h b/include/kcl/kcl_kernel.h new file mode 100644 index 0000000000000..d055fad138c19 --- /dev/null +++ b/include/kcl/kcl_kernel.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_KERNEL_H +#define AMDKCL_KERNEL_H + +#include +#include + +/* Copied from include/linux/kernel.h */ +#ifndef u64_to_user_ptr +#define u64_to_user_ptr(x) ( \ +{ \ + typecheck(u64, x); \ + (void __user *)(uintptr_t)x; \ +} \ +) +#endif + +#ifndef __GFP_RETRY_MAYFAIL +/* Copied from include/linux/gfp.h and modified for KCL */ +#define __GFP_RETRY_MAYFAIL __GFP_NORETRY +#endif + +#ifndef ALIGN_DOWN +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#endif /* ALIGN_DOWN */ + +#ifndef ___GFP_KSWAPD_RECLAIM +#define ___GFP_KSWAPD_RECLAIM 0x00u +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#endif /* ___GFP_KSWAPD_RECLAIM */ + +#endif /* AMDKCL_KERNEL_H */ diff --git a/include/kcl/kcl_kref.h b/include/kcl/kcl_kref.h new file mode 100644 index 0000000000000..491ce5398137b --- /dev/null +++ b/include/kcl/kcl_kref.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * kref.h - library routines for handling generic reference counted objects + * + * Copyright (C) 2004 Greg Kroah-Hartman + * Copyright (C) 2004 IBM Corp. + * + * based on kobject.h which was: + * Copyright (C) 2002-2003 Patrick Mochel + * Copyright (C) 2002-2003 Open Source Development Labs + */ +#ifndef AMDKCL_KREF_H +#define AMDKCL_KREF_H + +#include + +/* Copied from include/linux/kref.h */ +#endif /* AMDKCL_KREF_H */ diff --git a/include/kcl/kcl_kthread.h b/include/kcl/kcl_kthread.h new file mode 100644 index 0000000000000..a4e7fdf6bb12f --- /dev/null +++ b/include/kcl/kcl_kthread.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_KTHREAD_H +#define AMDKCL_KTHREAD_H + +#include +#include +#include + +#if !defined(HAVE___KTHREAD_SHOULD_PATK) +extern bool __kcl_kthread_should_park(struct task_struct *k); +#endif + +/* Copied from v5.7-13665-g9bf5b9eb232b kernel/kthread.c */ +#ifndef HAVE_KTHREAD_USE_MM +static inline +void kthread_use_mm(struct mm_struct *mm) +{ + use_mm(mm); +} +static inline +void kthread_unuse_mm(struct mm_struct *mm) +{ + unuse_mm(mm); +} +#endif + +#endif /* AMDKCL_KTHREAD_H */ diff --git a/include/kcl/kcl_list.h b/include/kcl/kcl_list.h new file mode 100644 index 0000000000000..20e2bee6bef61 --- /dev/null +++ b/include/kcl/kcl_list.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_LIST_H +#define AMDKCL_LIST_H + +#include + +/* Copied from include/linux/list.h */ +#if !defined(HAVE_LIST_ROTATE_TO_FRONT) +static inline void list_rotate_to_front(struct list_head *list, + struct list_head *head) +{ + list_move_tail(head, list); +} +#endif + +#if !defined(HAVE_LIST_IS_FIRST) +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return list->prev == head; +} +#endif + +#endif /*AMDKCL_LIST_H*/ diff --git a/include/kcl/kcl_local64.h b/include/kcl/kcl_local64.h new file mode 100644 index 0000000000000..0b374fef81d85 --- /dev/null +++ b/include/kcl/kcl_local64.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_ASM_GENERIC_LOCAL64_H +#define AMDKCL_ASM_GENERIC_LOCAL64_H + +#include +#include +#include + +/* + * A signed long type for operations which are atomic for a single CPU. + * Usually used in combination with per-cpu variables. + * + * This is the default implementation, which uses atomic64_t. Which is + * rather pointless. The whole point behind local64_t is that some processors + * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs + * running on this CPU. local64_t allows exploitation of such capabilities. + */ + +/* Implement in terms of atomics. */ + +#if !defined HAVE_LINUX_LOCAL_TRY_CMPXCHG && defined HAVE_LINUX_ATOMIC_LONG_TRY_CMPXCHG +#define local_try_cmpxchg(l, po, n) atomic_long_try_cmpxchg((&(l)->a), (po), (n)) +#if BITS_PER_LONG == 64 + +static inline bool local64_try_cmpxchg(local64_t *l, s64 *old, s64 new) +{ + return local_try_cmpxchg(&l->a, (long *)old, new); +} +#else +#define local64_try_cmpxchg(l, po, n) atomic64_try_cmpxchg((&(l)->a), (po), (n)) +#endif +#endif +#endif diff --git a/include/kcl/kcl_math64.h b/include/kcl/kcl_math64.h new file mode 100644 index 0000000000000..f1d04dee6b8c5 --- /dev/null +++ b/include/kcl/kcl_math64.h @@ -0,0 +1,11 @@ +#ifndef AMDKCL_MATH64_H +#define AMDKCL_MATH64_H + +#include + +#ifndef DIV64_U64_ROUND_UP +#define DIV64_U64_ROUND_UP(ll, d) \ + ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) +#endif + +#endif \ No newline at end of file diff --git a/include/kcl/kcl_mce.h b/include/kcl/kcl_mce.h new file mode 100644 index 0000000000000..223c2bd03bb87 --- /dev/null +++ b/include/kcl/kcl_mce.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_MCE_H +#define AMDKCL_MCE_H + +#ifdef CONFIG_X86_MCE_AMD + +#include + +/* Copied from asm/mce.h */ +#ifndef XEC +#define XEC(x, mask) (((x) >> 16) & mask) +#endif + +#ifndef HAVE_MCE_PRIO_UC +#define MCE_PRIO_UC MCE_PRIO_SRAO +#endif + +#if !defined(HAVE_SMCA_GET_BANK_TYPE_WITH_TWO_ARGUMENTS) +#if defined(HAVE_SMCA_GET_BANK_TYPE_WITH_ONE_ARGUMENT) || defined(HAVE_STRUCT_SMCA_BANK) +#if defined(HAVE_STRUCT_SMCA_BANK) +enum smca_bank_types smca_get_bank_type(unsigned int bank); +#endif +static inline +enum smca_bank_types _kcl_smca_get_bank_type(unsigned int cpu, unsigned int bank) +{ + return smca_get_bank_type(bank); +} +#else +int smca_get_bank_type(unsigned int bank); +static inline +int _kcl_smca_get_bank_type(unsigned int cpu, unsigned int bank) +{ + return smca_get_bank_type(bank); +} +#endif +#endif + +#endif /* CONFIG_X86_MCE_AMD */ +#endif diff --git a/include/kcl/kcl_memory.h b/include/kcl/kcl_memory.h new file mode 100644 index 0000000000000..e0dac3be04b47 --- /dev/null +++ b/include/kcl/kcl_memory.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_KCL_MEMORY_H +#define _KCL_KCL_MEMORY_H + +#ifndef HAVE_VMF_INSERT +static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, + unsigned long addr, + pfn_t pfn) +{ + int err; +#if !defined(HAVE_PFN_T_VM_INSERT_MIXED) + err = vm_insert_mixed(vma, addr, pfn_t_to_pfn(pfn)); +#else + err = vm_insert_mixed(vma, addr, pfn); +#endif + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) +{ + int err = vm_insert_pfn(vma, addr, pfn); + + if (err == -ENOMEM) + return VM_FAULT_OOM; + if (err < 0 && err != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +#endif /* HAVE_VMF_INSERT */ + +#ifndef HAVE_VMF_INSERT_MIXED_PROT +vm_fault_t _kcl_vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn, pgprot_t pgprot); +static inline +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn, pgprot_t pgprot) +{ + return _kcl_vmf_insert_mixed_prot(vma, addr, pfn, pgprot); +} +#endif /* HAVE_VMF_INSERT_MIXED_PROT */ + +#ifndef HAVE_VMF_INSERT_PFN_PROT +vm_fault_t _kcl_vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot); +static inline +vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ + return _kcl_vmf_insert_pfn_prot(vma, addr, pfn, pgprot); +} +#endif /* HAVE_VMF_INSERT_PFN_PROT */ + +#endif diff --git a/include/kcl/kcl_mm.h b/include/kcl/kcl_mm.h new file mode 100644 index 0000000000000..b502b239610ef --- /dev/null +++ b/include/kcl/kcl_mm.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * linux/ipc/util.c + * Copyright (C) 1992 Krishna Balasubramanian + * For kvmalloc/kvzalloc + */ +#ifndef AMDKCL_MM_H +#define AMDKCL_MM_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef untagged_addr +/* Copied from include/linux/mm.h */ +#define untagged_addr(addr) (addr) +#endif + +#ifndef HAVE_MMPUT_ASYNC +extern void (*_kcl_mmput_async)(struct mm_struct *mm); +#endif + +#ifndef HAVE_ZONE_DEVICE_PAGE_INIT +void zone_device_page_init(struct page *page); +#endif + +#ifndef HAVE_FAULT_FLAG_ALLOW_RETRY_FIRST +static inline bool fault_flag_allow_retry_first(unsigned int flags) +{ + return (flags & FAULT_FLAG_ALLOW_RETRY) && + (!(flags & FAULT_FLAG_TRIED)); +} +#endif + +#if !defined(HAVE_MEMALLOC_NOFS_SAVE) +static inline unsigned int memalloc_nofs_save(void) +{ + return current->flags; +} + +static inline void memalloc_nofs_restore(unsigned int flags) +{ +} +#endif + +#if !defined(HAVE_ZONE_MANAGED_PAGES) +/* Copied from v4.20-6505-g9705bea5f833 include/linux/mmzone.h and modified for KCL */ +static inline unsigned long zone_managed_pages(struct zone *zone) +{ +#if defined(HAVE_STRUCT_ZONE_MANAGED_PAGES) + return (unsigned long)zone->managed_pages; +#else + /* zone->managed_pages is introduced in v3.7-4152-g9feedc9d831e */ + WARN_ONCE(1, "struct zone->managed_pages don't exist. kernel is a bit old..."); + return 0; +#endif +} +#endif /* HAVE_ZONE_MANAGED_PAGES */ + +#ifndef HAVE_IS_COW_MAPPING +static inline bool is_cow_mapping(vm_flags_t flags) +{ + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; +} +#endif /* HAVE_IS_COW_MAPPING */ + +#ifndef HAVE_VMA_LOOKUP +/** + * vma_lookup() - Find a VMA at a specific address + * @mm: The process address space. + * @addr: The user address. + * + * Return: The vm_area_struct at the given address, %NULL otherwise. + */ +static inline +struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + + if (vma && addr < vma->vm_start) + vma = NULL; + + return vma; +} +#endif /* HAVE_VMA_LOOKUP */ + +#ifndef VM_ACCESS_FLAGS +/* Copied from v5.6-12367-g6cb4d9a2870d mm/vma: introduce VM_ACCESS_FLAGS*/ +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) +#endif + +#ifndef page_to_virt +#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) +#endif + +#ifndef HAVE_VM_FLAGS_SET +static inline void vm_flags_set(struct vm_area_struct *vma, + vm_flags_t flags) +{ +#ifdef HAVE_MMAP_ASSERT_WRITE_LOCKED + mmap_assert_write_locked(vma->vm_mm); +#endif + vma->vm_flags |= flags; +} + +static inline void vm_flags_clear(struct vm_area_struct *vma, + vm_flags_t flags) +{ +#ifdef HAVE_MMAP_ASSERT_WRITE_LOCKED + mmap_assert_write_locked(vma->vm_mm); +#endif + vma->vm_flags &= ~flags; +} +#endif + +#ifndef HAVE_WANT_INIT_ON_FREE +static inline bool want_init_on_free(void) +{ + pr_warn_once("legacy kernel without want_init_on_free()\n"); + return false; +} +#endif + +#ifndef HAVE_TOTALRAM_PAGES +extern unsigned long totalram_pages; +static inline unsigned long _kcl_totalram_pages(void) +{ + return totalram_pages; +} +#define totalram_pages _kcl_totalram_pages +#endif /* HAVE_TOTALRAM_PAGES */ + +/*copy from include/linux/mm.h */ +#ifndef HAVE_VMA_IS_INITIAL_HEAP +/* + * Indicate if the VMA is a heap for the given task; for + * /proc/PID/maps that is the heap of the main task. + */ +static inline bool vma_is_initial_heap(const struct vm_area_struct *vma) +{ + return vma->vm_start <= vma->vm_mm->brk && + vma->vm_end >= vma->vm_mm->start_brk; +} + +/* + * Indicate if the VMA is a stack for the given task; for + * /proc/PID/maps that is the stack of the main task. + */ +static inline bool vma_is_initial_stack(const struct vm_area_struct *vma) +{ + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; +} +#endif + +#ifndef HAVE_FOLLOW_PFN +int _kcl_follow_pfn(struct vm_area_struct *vma, unsigned long address, + unsigned long *pfn); +#define follow_pfn _kcl_follow_pfn +#endif + +#endif /* AMDKCL_MM_H */ diff --git a/include/kcl/kcl_mm_types.h b/include/kcl/kcl_mm_types.h new file mode 100644 index 0000000000000..6cf223e559d02 --- /dev/null +++ b/include/kcl/kcl_mm_types.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_MM_TYPES_H +#define AMDKCL_MM_TYPES_H + +#include +#ifdef HAVE_PFN_T +#include +#else +/* Copied from include/linux/pfn_t.h */ +typedef struct { + u64 val; +} pfn_t; + +#define PFN_FLAGS_MASK (((unsigned long) ~PAGE_MASK) \ + << (BITS_PER_LONG - PAGE_SHIFT)) +#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1)) +#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2)) +#define PFN_DEV (1UL << (BITS_PER_LONG - 3)) +#define PFN_MAP (1UL << (BITS_PER_LONG - 4)) + +static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags) +{ + pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; + + return pfn_t; +} + +static inline unsigned long pfn_t_to_pfn(pfn_t pfn) +{ + return pfn.val & ~PFN_FLAGS_MASK; +} +#endif + +#ifndef HAVE_VMF_INSERT +typedef int vm_fault_t; +#endif + +#endif /* AMDKCL_MM_TYPES_H */ + diff --git a/include/kcl/kcl_mmap_lock.h b/include/kcl/kcl_mmap_lock.h new file mode 100644 index 0000000000000..b677506d80cf1 --- /dev/null +++ b/include/kcl/kcl_mmap_lock.h @@ -0,0 +1,48 @@ +#ifndef KCL_KCL_MMAP_LOCK_H +#define KCL_KCL_MMAP_LOCK_H + +#ifdef HAVE_LINUX_MMAP_LOCK_H +#include +#else +/* Copied from include/linux/mmap_lock.h */ +static inline void mmap_init_lock(struct mm_struct *mm) +{ + init_rwsem(&mm->mmap_sem); +} + +static inline void mmap_write_lock(struct mm_struct *mm) +{ + down_write(&mm->mmap_sem); +} + +static inline bool mmap_write_trylock(struct mm_struct *mm) +{ + return down_write_trylock(&mm->mmap_sem) != 0; +} + +static inline void mmap_write_unlock(struct mm_struct *mm) +{ + up_write(&mm->mmap_sem); +} + +static inline void mmap_write_downgrade(struct mm_struct *mm) +{ + downgrade_write(&mm->mmap_sem); +} + +static inline void mmap_read_lock(struct mm_struct *mm) +{ + down_read(&mm->mmap_sem); +} + +static inline bool mmap_read_trylock(struct mm_struct *mm) +{ + return down_read_trylock(&mm->mmap_sem) != 0; +} + +static inline void mmap_read_unlock(struct mm_struct *mm) +{ + up_read(&mm->mmap_sem); +} +#endif +#endif /* KCL_KCL_MMAP_LOCK_H */ diff --git a/include/kcl/kcl_mmu_notifier.h b/include/kcl/kcl_mmu_notifier.h new file mode 100644 index 0000000000000..eb18197778b02 --- /dev/null +++ b/include/kcl/kcl_mmu_notifier.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_MMU_NOTIFIER_H +#define AMDKCL_MMU_NOTIFIER_H + +#include + +#if !defined(HAVE_MMU_NOTIFIER_RANGE_BLOCKABLE) +/* Copied from v5.1-10225-g4a83bfe916f3 include/linux/mmu_notifier.h */ +#if defined(CONFIG_MMU_NOTIFIER) && \ + defined(HAVE_2ARGS_INVALIDATE_RANGE_START) +static inline bool +mmu_notifier_range_blockable(const struct mmu_notifier_range *range) +{ +/* + * It's for rhel8.5 which has the latest struct mmu_notifier_range + * and no mmu_notifier_range_blockable + */ +#ifdef MMU_NOTIFIER_RANGE_BLOCKABLE + return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); +#else + return range->blockable; +#endif +} +#else +struct mmu_notifier_range; +static inline bool +mmu_notifier_range_blockable(const struct mmu_notifier_range *range) +{ + return true; +} +#endif +#endif /* HAVE_MMU_NOTIFIER_RANGE_BLOCKABLE */ + +#endif /* AMDKCL_MMU_NOTIFIER_H */ diff --git a/include/kcl/kcl_mmzone.h b/include/kcl/kcl_mmzone.h new file mode 100644 index 0000000000000..7cd5ea05d5af8 --- /dev/null +++ b/include/kcl/kcl_mmzone.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_MMZONE_H +#define _KCL_MMZONE_H + +#include + +#ifndef __ASSEMBLY__ +#ifndef __GENERATING_BOUNDS_H + +#ifndef MAX_PAGE_ORDER +#define MAX_PAGE_ORDER MAX_ORDER +#endif + +#ifndef NR_PAGE_ORDERS +#define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) +#endif + +#endif +#endif + +#endif diff --git a/include/kcl/kcl_mn.h b/include/kcl/kcl_mn.h new file mode 100644 index 0000000000000..f828b5dedec49 --- /dev/null +++ b/include/kcl/kcl_mn.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_MN_H +#define AMDKCL_MN_H + +#include + +/* mmu_notifier_put in the RH DRM backport from 5.6 is broken */ +#if DRM_VER == 5 && DRM_PATCH == 6 && \ + LINUX_VERSION_CODE == KERNEL_VERSION(4, 18, 0) +#undef HAVE_MMU_NOTIFIER_PUT +#endif + +/* Copied from v3.16-6588-gb972216e27d1 include/linux/mmu_notifier.h */ +#if !defined(HAVE_MMU_NOTIFIER_CALL_SRCU) && \ + !defined(HAVE_MMU_NOTIFIER_PUT) +extern void mmu_notifier_call_srcu(struct rcu_head *rcu, + void (*func)(struct rcu_head *rcu)); +extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, + struct mm_struct *mm); +#endif + +#endif /* AMDKCL_MN_H */ diff --git a/include/kcl/kcl_module.h b/include/kcl/kcl_module.h new file mode 100644 index 0000000000000..2265f3bed4091 --- /dev/null +++ b/include/kcl/kcl_module.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Dynamic loading of modules into the kernel. + * + * Rewritten by Richard Henderson Dec 1996 + * Rewritten again by Rusty Russell, 2002 + */ +#ifndef _KCL_KCL_LINUX_MODULE_H_H +#define _KCL_KCL_LINUX_MODULE_H_H + +#include + +/* Copied from v5.3-11739-g3e4d890a26d5 include/linux/module.h */ +#ifndef MODULE_IMPORT_NS +#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) +#endif + +#endif diff --git a/include/kcl/kcl_moduleparam.h b/include/kcl/kcl_moduleparam.h new file mode 100644 index 0000000000000..e579efe182bbd --- /dev/null +++ b/include/kcl/kcl_moduleparam.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_KCL_LINUX_MODULE_PARAMS_H_H +#define _KCL_KCL_LINUX_MODULE_PARAMS_H_H + +#include +#include + +/* Copied from v5.8-rc2-514-g7d8365771ffb include/linux/moduleparam.h */ +#ifndef param_check_hexint +#define _kcl_param_check_hexint +extern const struct kernel_param_ops param_ops_hexint; +extern int param_set_hexint(const char *val, const struct kernel_param *kp); +extern int param_get_hexint(char *buffer, const struct kernel_param *kp); +#define param_check_hexint(name, p) param_check_uint(name, p) +#endif /* param_check_hexint */ + +#ifndef param_check_ullong +#define _kcl_param_check_ullong +extern const struct kernel_param_ops param_ops_ullong; +extern int param_set_ullong(const char *val, const struct kernel_param *kp); +extern int param_get_ullong(char *buffer, const struct kernel_param *kp); +#define param_check_ullong(name, p) __param_check(name, p, unsigned long long) +#endif /* param_check_ullong */ + +#endif diff --git a/include/kcl/kcl_pagemap.h b/include/kcl/kcl_pagemap.h new file mode 100644 index 0000000000000..f95a11d945ebc --- /dev/null +++ b/include/kcl/kcl_pagemap.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_PAGEMAP_H +#define AMDKCL_PAGEMAP_H + +#include + +#ifndef HAVE_MM_RELEASE_PAGES_2ARGS +static inline void _kcl_release_pages(struct page **pages, int nr) +{ + release_pages(pages, nr, 0); +} +#define release_pages _kcl_release_pages +#endif +#endif diff --git a/include/kcl/kcl_pci.h b/include/kcl/kcl_pci.h new file mode 100644 index 0000000000000..11dd0f05f5879 --- /dev/null +++ b/include/kcl/kcl_pci.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * pci.h + * + * PCI defines and function prototypes + * Copyright 1994, Drew Eckhardt + * Copyright 1997--1999 Martin Mares + * + * PCI Express ASPM defines and function prototypes + * Copyright (c) 2007 Intel Corp. + * Zhang Yanmin (yanmin.zhang@intel.com) + * Shaohua Li (shaohua.li@intel.com) + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI Express Specification + * PCI System Design Guide + */ +#ifndef AMDKCL_PCI_H +#define AMDKCL_PCI_H + +#include +#include + +#ifndef PCI_EXP_DEVCAP2_ATOMIC_ROUTE +#define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ +#endif +#ifndef PCI_EXP_DEVCAP2_ATOMIC_COMP32 +#define PCI_EXP_DEVCAP2_ATOMIC_COMP32 0x00000080 /* 32b AtomicOp completion */ +#endif +#ifndef PCI_EXP_DEVCAP2_ATOMIC_COMP64 +#define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* 64b AtomicOp completion*/ +#endif +#ifndef PCI_EXP_DEVCAP2_ATOMIC_COMP128 +#define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion*/ +#endif +#ifndef PCI_EXP_DEVCTL2_ATOMIC_REQ +#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */ +#endif +#ifndef PCI_EXP_DEVCTL2_ATOMIC_BLOCK +#define PCI_EXP_DEVCTL2_ATOMIC_BLOCK 0x0040 /* Block AtomicOp on egress */ +#endif +#ifndef PCI_EXP_LNKCTL2_ENTER_COMP +#define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */ +#endif +#ifndef PCI_EXP_LNKCTL2_TX_MARGIN +#define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ +#endif + +#ifndef PCI_EXP_LNKCTL2_TLS +#define PCI_EXP_LNKCTL2_TLS 0x000f +#endif +#ifndef PCI_EXP_LNKCTL2_TLS_2_5GT +#define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ +#endif +#ifndef PCI_EXP_LNKCTL2_TLS_5_0GT +#define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ +#endif +#ifndef PCI_EXP_LNKCTL2_TLS_8_0GT +#define PCI_EXP_LNKCTL2_TLS_8_0GT 0x0003 /* Supported Speed 8GT/s */ +#endif + +#define PCIE_SPEED_16_0GT 0x17 +#define PCIE_SPEED_32_0GT 0x18 + +#ifndef PCI_EXP_LNKCAP2_SLS_16_0GB +#define PCI_EXP_LNKCAP2_SLS_16_0GB 0x00000010 /* Supported Speed 16GT/s */ +#endif +#ifndef PCI_EXP_LNKCAP_SLS_16_0GB +#define PCI_EXP_LNKCAP_SLS_16_0GB 0x00000004 /* LNKCAP2 SLS Vector bit 3 */ +#endif +#ifndef PCI_EXP_LNKSTA_CLS_16_0GB +#define PCI_EXP_LNKSTA_CLS_16_0GB 0x0004 /* Current Link Speed 16.0GT/s */ +#endif + +/* PCIe link information */ +#ifndef PCIE_SPEED2STR +#define PCIE_SPEED2STR(speed) \ + ((speed) == PCIE_SPEED_16_0GT ? "16 GT/s" : \ + (speed) == PCIE_SPEED_8_0GT ? "8 GT/s" : \ + (speed) == PCIE_SPEED_5_0GT ? "5 GT/s" : \ + (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \ + "Unknown speed") +#endif + +/* PCIe speed to Mb/s reduced by encoding overhead */ +#ifndef PCIE_SPEED2MBS_ENC +#define PCIE_SPEED2MBS_ENC(speed) \ + ((speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \ + (speed) == PCIE_SPEED_8_0GT ? 8000*128/130 : \ + (speed) == PCIE_SPEED_5_0GT ? 5000*8/10 : \ + (speed) == PCIE_SPEED_2_5GT ? 2500*8/10 : \ + 0) +#endif + +#ifndef PCI_ERROR_RESPONSE +#define PCI_ERROR_RESPONSE (~0ULL) +#define PCI_SET_ERROR_RESPONSE(val) (*(val) = ((typeof(*(val))) PCI_ERROR_RESPONSE)) +#define PCI_POSSIBLE_ERROR(val) ((val) == ((typeof(val)) PCI_ERROR_RESPONSE)) +#endif + +static inline enum pci_bus_speed kcl_pcie_get_speed_cap(struct pci_dev *dev) +{ + return pcie_get_speed_cap(dev); +} + +static inline enum pcie_link_width kcl_pcie_get_width_cap(struct pci_dev *dev) +{ + return pcie_get_width_cap(dev); +} + +/* Copied from v3.12-rc2-29-gc6bde215acfd include/linux/pci.h */ +#if !defined(HAVE_PCI_UPSTREAM_BRIDGE) +static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev) +{ + dev = pci_physfn(dev); + if (pci_is_root_bus(dev->bus)) + return NULL; + + return dev->bus->self; +} +#endif + +#if !defined(HAVE_PCI_CONFIGURE_EXTENDED_TAGS) +void _kcl_pci_configure_extended_tags(struct pci_dev *dev); +#endif + +static inline void kcl_pci_configure_extended_tags(struct pci_dev *dev) +{ +#if !defined(HAVE_PCI_CONFIGURE_EXTENDED_TAGS) + _kcl_pci_configure_extended_tags(dev); +#endif +} + +/* Copied from v5.1-rc1-5-g4e544bac8267 include/linux/pci.h */ +#if !defined(HAVE_PCI_DEV_ID) +static inline u16 pci_dev_id(struct pci_dev *dev) +{ + return PCI_DEVID(dev->bus->number, dev->devfn); +} +#endif /* HAVE_PCI_DEV_ID */ + +#ifndef HAVE_PCI_PR3_PRESENT +#ifdef CONFIG_ACPI +bool _kcl_pci_pr3_present(struct pci_dev *pdev); +static inline bool pci_pr3_present(struct pci_dev *pdev) +{ + return _kcl_pci_pr3_present(pdev); +} +#else +static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; } +#endif +#endif /* HAVE_PCI_PR3_PRESENT */ + +#ifndef PCI_EXP_LNKCAP_SLS_8_0GB +#define AMDKCL_CREATE_MEASURE_FILE +#define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */ +int _kcl_pci_create_measure_file(struct pci_dev *pdev); +void _kcl_pci_remove_measure_file(struct pci_dev *pdev); +#endif + +static inline int kcl_pci_create_measure_file(struct pci_dev *pdev) +{ +#ifdef AMDKCL_CREATE_MEASURE_FILE + return _kcl_pci_create_measure_file(pdev); +#else + return 0; +#endif +} + +static inline void kcl_pci_remove_measure_file(struct pci_dev *pdev) +{ +#ifdef AMDKCL_CREATE_MEASURE_FILE + _kcl_pci_remove_measure_file(pdev); +#endif +} + +/* + * v4.18-rc1-3-gb1277a226d8c PCI: Cleanup PCI_REBAR_CTRL_BAR_SHIFT handling + * v4.18-rc1-2-gd3252ace0bc6 PCI: Restore resized BAR state on resume + * v4.14-rc3-3-g8bb705e3e79d PCI: Add pci_resize_resource() for resizing BARs + * v4.14-rc3-2-g276b738deb5b PCI: Add resizable BAR infrastructure + */ +#ifdef PCI_REBAR_CTRL_BAR_SHIFT +#define AMDKCL_ENABLE_RESIZE_FB_BAR + +/* Copied from 192f1bf7559e895d51f81c3976c5892c8b1e0601 include/linux/pci.h */ +#ifndef HAVE_PCI_REBAR_BYTES_TO_SIZE +static inline int pci_rebar_bytes_to_size(u64 bytes) +{ + bytes = roundup_pow_of_two(bytes); + + /* Return BAR size as defined in the resizable BAR specification */ + return max(ilog2(bytes), 20) - 20; +} + +/* + * 907830b0fc9e PCI: Add a REBAR size quirk for Sapphire RX 5600 XT Pulse + * 8fbdbb66f8c1 PCI: Export pci_rebar_get_possible_sizes() + */ +u32 _kcl_pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar); +static inline +u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar) +{ + return _kcl_pci_rebar_get_possible_sizes(pdev, bar); +} +#endif + +#endif /* PCI_REBAR_CTRL_BAR_SHIFT */ + +/* Copied from include/linux/pci.h */ +#ifndef HAVE_PCI_GET_BASE_CLASS +#ifdef CONFIG_PCI +struct pci_dev *pci_get_base_class(unsigned int class, struct pci_dev *from); +#else /*CONFIG_PCI*/ +static inline struct pci_dev *pci_get_base_class(unsigned int class, + struct pci_dev *from) +{ return NULL; } +#endif /*CONFIG_PCI*/ +#endif /*HAVE_PCI_GET_BASE_CLASS*/ + +/* Copied from include/linux/pci.h */ +#ifndef PCI_IRQ_INTX +#define PCI_IRQ_INTX PCI_IRQ_LEGACY +#endif + +#endif /* AMDKCL_PCI_H */ diff --git a/include/kcl/kcl_pci_ids.h b/include/kcl/kcl_pci_ids.h new file mode 100644 index 0000000000000..e56bf58438f5b --- /dev/null +++ b/include/kcl/kcl_pci_ids.h @@ -0,0 +1,10 @@ +#ifndef AMDKCL_PCI_IDS_H +#define AMDKCL_PCI_IDS_H + +#include + +#ifndef PCI_CLASS_ACCELERATOR_PROCESSING +#define PCI_CLASS_ACCELERATOR_PROCESSING 0x1200 +#endif + +#endif \ No newline at end of file diff --git a/include/kcl/kcl_perf_event.h b/include/kcl/kcl_perf_event.h new file mode 100644 index 0000000000000..b22cbc296b484 --- /dev/null +++ b/include/kcl/kcl_perf_event.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner + * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef AMD_KCL_PERF_EVENT_H +#define AMD_KCL_PERF_EVENT_H +#include + +#if !defined(HAVE_PERF_EVENT_UPDATE_USERPAGE) +extern void (*_kcl_perf_event_update_userpage)(struct perf_event *event); +#endif +#endif diff --git a/include/kcl/kcl_pm.h b/include/kcl/kcl_pm.h new file mode 100644 index 0000000000000..37c761718589e --- /dev/null +++ b/include/kcl/kcl_pm.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * pm.h - Power management interface + * + * Copyright (C) 2000 Andrew Henroid + */ +#ifndef KCL_KCL_PM_H +#define KCL_KCL_PM_H + +#include +#include + +/* + * v5.7-rc2-7-ge07515563d01 + * PM: sleep: core: Rename DPM_FLAG_NEVER_SKIP + */ +#ifndef DPM_FLAG_NO_DIRECT_COMPLETE +#define DPM_FLAG_NO_DIRECT_COMPLETE DPM_FLAG_NEVER_SKIP +#endif + + +/* + * v4.15-rc1-1-g0d4b54c6fee8 + * PM / core: Add LEAVE_SUSPENDED driver flag + */ +#ifndef DPM_FLAG_SMART_SUSPEND +#define DPM_FLAG_SMART_SUSPEND BIT(2) +#endif + +/* + * v5.7-rc2-8-g2a3f34750b8b + * PM: sleep: core: Rename DPM_FLAG_LEAVE_SUSPENDED + */ +#ifndef DPM_FLAG_MAY_SKIP_RESUME +#define DPM_FLAG_MAY_SKIP_RESUME BIT(3) +#endif + +#endif diff --git a/include/kcl/kcl_preempt.h b/include/kcl/kcl_preempt.h new file mode 100644 index 0000000000000..cc861beb098ae --- /dev/null +++ b/include/kcl/kcl_preempt.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_PREEMPT_H +#define AMDKCL_PREEMPT_H +#include + +#ifndef in_task +#ifndef PREEMPT_BITS +/* + * We put the hardirq and softirq counter into the preemption + * counter. The bitmask has the following meaning: + * + * - bits 0-7 are the preemption count (max preemption depth: 256) + * - bits 8-15 are the softirq count (max # of softirqs: 256) + * + * The hardirq count could in theory be the same as the number of + * interrupts in the system, but we run all interrupt handlers with + * interrupts disabled, so we cannot have nesting interrupts. Though + * there are a few palaeontologic drivers which reenable interrupts in + * the handler, so we need more than one bit here. + * + * PREEMPT_MASK: 0x000000ff + * SOFTIRQ_MASK: 0x0000ff00 + * HARDIRQ_MASK: 0x000f0000 + * NMI_MASK: 0x00100000 + * PREEMPT_NEED_RESCHED: 0x80000000 + */ +#define PREEMPT_BITS 8 +#define SOFTIRQ_BITS 8 +#define HARDIRQ_BITS 4 +#define NMI_BITS 1 + +#define PREEMPT_SHIFT 0 +#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) +#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) + +#define __IRQ_MASK(x) ((1UL << (x))-1) + +#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) +#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) +#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) + +#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) +#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) +#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) +#define NMI_OFFSET (1UL << NMI_SHIFT) + +#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) +#endif + +#define in_task() (!(preempt_count() & \ + (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#endif + +#ifndef HAVE_MIGRATE_DISABLE +static __always_inline void migrate_disable(void) +{ + preempt_disable(); +} +static __always_inline void migrate_enable(void) +{ + preempt_enable(); +} +#endif /* HAVE_MIGRATE_DISABLE */ + +#endif /* AMDKCL_PREEMPT_H */ diff --git a/include/kcl/kcl_rbtree.h b/include/kcl/kcl_rbtree.h new file mode 100644 index 0000000000000..6a0f687a0801e --- /dev/null +++ b/include/kcl/kcl_rbtree.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef AMDKCL_LINUX_RBTREE_H +#define AMDKCL_LINUX_RBTREE_H + +#include + +#ifndef RB_ROOT_CACHED +/* + * Leftmost-cached rbtrees. + * + * We do not cache the rightmost node based on footprint + * size vs number of potential users that could benefit + * from O(1) rb_last(). Just not worth it, users that want + * this feature can always implement the logic explicitly. + * Furthermore, users that want to cache both pointers may + * find it a bit asymmetric, but that's ok. + */ +struct rb_root_cached { + struct rb_root rb_root; + struct rb_node *rb_leftmost; +}; + +#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } +#define rb_first_cached(root) (root)->rb_leftmost + +static inline struct rb_node * +rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) +{ + struct rb_node *leftmost = NULL; + + if (root->rb_leftmost == node) + leftmost = root->rb_leftmost = rb_next(node); + + rb_erase(node, &root->rb_root); + + return leftmost; +} + +static inline void rb_insert_color_cached(struct rb_node *node, + struct rb_root_cached *root, + bool leftmost) +{ + if (leftmost) + root->rb_leftmost = node; + rb_insert_color(node, &root->rb_root); +} +#endif + +#ifndef HAVE_RB_ADD_CACHED +/* + * The below helper functions use 2 operators with 3 different + * calling conventions. The operators are related like: + * + * comp(a->key,b) < 0 := less(a,b) + * comp(a->key,b) > 0 := less(b,a) + * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) + * + * If these operators define a partial order on the elements we make no + * guarantee on which of the elements matching the key is found. See + * rb_find(). + * + * The reason for this is to allow the find() interface without requiring an + * on-stack dummy object, which might not be feasible due to object size. + */ + +/** + * rb_add_cached() - insert @node into the leftmost cached tree @tree + * @node: node to insert + * @tree: leftmost cached tree to insert @node into + * @less: operator defining the (partial) node order + * + * Returns @node when it is the new leftmost, or NULL. + */ +static __always_inline struct rb_node * +rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, + bool (*less)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + bool leftmost = true; + + while (*link) { + parent = *link; + if (less(node, parent)) { + link = &parent->rb_left; + } else { + link = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(node, parent, link); + rb_insert_color_cached(node, tree, leftmost); + + return leftmost ? node : NULL; +} +#endif + +#endif diff --git a/include/kcl/kcl_rcupdate.h b/include/kcl/kcl_rcupdate.h new file mode 100644 index 0000000000000..d2b09177e7c56 --- /dev/null +++ b/include/kcl/kcl_rcupdate.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * Copyright IBM Corporation, 2001 + * + * Author: Dipankar Sarma + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ +#ifndef AMDKCL_RCUPDATE_H +#define AMDKCL_RCUPDATE_H + +#include +#include + +#ifndef rcu_pointer_handoff +#define rcu_pointer_handoff(p) (p) +#endif + +#ifndef rcu_replace_pointer +#if defined(rcu_dereference_protected) && defined(rcu_assign_pointer) +/** + * rcu_replace_pointer() - replace an RCU pointer, returning its old value + * @rcu_ptr: RCU pointer, whose old value is returned + * @ptr: regular pointer + * @c: the lockdep conditions under which the dereference will take place + * + * Perform a replacement, where @rcu_ptr is an RCU-annotated + * pointer and @c is the lockdep argument that is passed to the + * rcu_dereference_protected() call used to read that pointer. The old + * value of @rcu_ptr is returned, and @rcu_ptr is set to @ptr. + */ +#define rcu_replace_pointer(rcu_ptr, ptr, c) \ +({ \ + typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ + rcu_assign_pointer((rcu_ptr), (ptr)); \ + __tmp; \ +}) +#endif +#endif + +#endif /* AMDKCL_RCUPDATE_H */ diff --git a/include/kcl/kcl_reservation.h b/include/kcl/kcl_reservation.h new file mode 100644 index 0000000000000..32d6d2b8b7826 --- /dev/null +++ b/include/kcl/kcl_reservation.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KCL_KCL_RESERVATION_H +#define KCL_KCL_RESERVATION_H + +#include + +#ifndef HAVE_LINUX_DMA_RESV_H +#define reservation_object dma_resv +#define reservation_object_list dma_resv_list +#endif + +#endif /* AMDKCL_RESERVATION_H */ diff --git a/include/kcl/kcl_sched_mm.h b/include/kcl/kcl_sched_mm.h new file mode 100644 index 0000000000000..31e59278e4b5d --- /dev/null +++ b/include/kcl/kcl_sched_mm.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_KCL_SCHED_MM_H +#define _KCL_KCL_SCHED_MM_H + +#include +#include +#include +#include +#include + +#ifndef SHRINK_EMPTY +#define SHRINK_EMPTY (~0UL - 1) +#define SHRINK_STOP (~0UL) +#endif + +#ifndef HAVE_FS_RECLAIM_ACQUIRE +#ifdef CONFIG_LOCKDEP +extern void __fs_reclaim_acquire(void); +extern void __fs_reclaim_release(void); +static inline void fs_reclaim_acquire(gfp_t gfp_mask) { + return _kcl_fs_reclaim_acquire(gfp_mask); +} +static inline void fs_reclaim_release(gfp_t gfp_mask) { + return _kcl_fs_reclaim_release(gfp_mask); +} +#else +static inline void __fs_reclaim_acquire(void) { } +static inline void __fs_reclaim_release(void) { } +static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } +static inline void fs_reclaim_release(gfp_t gfp_mask) { } +#endif /* CONFIG_LOCKDEP */ +#endif /* HAVE_FS_RECLAIM_ACQUIRE */ + +#ifndef HAVE_MEMALLOC_NORECLAIM_SAVE +static inline unsigned int memalloc_noreclaim_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC; + current->flags |= PF_MEMALLOC; + return flags; +} + +static inline void memalloc_noreclaim_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC) | flags; +} +#endif /* HAVE_MEMALLOC_NORECLAIM_SAVE */ + +#endif diff --git a/include/kcl/kcl_seq_file.h b/include/kcl/kcl_seq_file.h new file mode 100644 index 0000000000000..b884645a14388 --- /dev/null +++ b/include/kcl/kcl_seq_file.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_SEQ_FILE_H +#define AMDKCL_SEQ_FILE_H + +#include + +/* Copied from linux/seq_file.h */ +#ifndef DEFINE_SHOW_ATTRIBUTE +#define DEFINE_SHOW_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} +#endif + +#ifndef HAVE_SEQ_HEX_DUMP +void _kcl_seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, const void *buf, size_t len, + bool ascii); + +static inline void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, const void *buf, size_t len, + bool ascii) +{ + _kcl_seq_hex_dump(m, prefix_str, prefix_type, rowsize, groupsize, buf, len, ascii); +} +#endif +#endif diff --git a/include/kcl/kcl_seqlock.h b/include/kcl/kcl_seqlock.h new file mode 100644 index 0000000000000..39f2aa7baa4e5 --- /dev/null +++ b/include/kcl/kcl_seqlock.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_KCL_SEQLOCK_H +#define _KCL_KCL_SEQLOCK_H + +#include + +#ifndef write_seqcount_begin +struct ww_mutex; +static __always_inline void +seqcount_ww_mutex_init(seqcount_t *s, struct ww_mutex *lock) +{ + seqcount_init(s); +} + +static inline void _kcl_write_seqcount_begin(seqcount_t *s) +{ + preempt_disable(); + write_seqcount_begin(s); +} +#define write_seqcount_begin _kcl_write_seqcount_begin + +static inline void _kcl_write_seqcount_end(seqcount_t *s) +{ + write_seqcount_end(s); + preempt_enable(); +} +#define write_seqcount_end _kcl_write_seqcount_end +#endif /* write_seqcount_begin */ + +#endif diff --git a/include/kcl/kcl_shrinker.h b/include/kcl/kcl_shrinker.h new file mode 100644 index 0000000000000..ca93cd0197f9f --- /dev/null +++ b/include/kcl/kcl_shrinker.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef AMDKCL_SHRINKER_H +#define AMDKCL_SHRINKER_H + +#ifndef HAVE_SYNCHRONIZE_SHRINKERS +extern void synchronize_shrinkers(void); +#endif + +#ifndef HAVE_SHRINKER_REGISTER +static inline int __printf(2, 3) kcl_register_shrinker(struct shrinker *shrinker, + const char *fmt, ...) +{ +#if defined(HAVE_REGISTER_SHRINKER_WITH_TWO_ARGUMENTS) + return register_shrinker(shrinker, fmt); +#else + return register_shrinker(shrinker); +#endif +} +#endif + +#endif diff --git a/include/kcl/kcl_slab.h b/include/kcl/kcl_slab.h new file mode 100644 index 0000000000000..640242253c588 --- /dev/null +++ b/include/kcl/kcl_slab.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). + * + * (C) SGI 2006, Christoph Lameter + * Cleaned up and restructured to ease the addition of alternative + * implementations of SLAB allocators. + * (C) Linux Foundation 2008-2013 + * Unified interface for all slab allocators + */ +#ifndef AMDKCL_SLAB_H +#define AMDKCL_SLAB_H + +#include +#include + +#ifndef HAVE_KREALLOC_ARRAY +/** + * krealloc_array - reallocate memory for an array. + * @p: pointer to the memory chunk to reallocate + * @new_n: new number of elements to alloc + * @new_size: new size of a single member of the array + * @flags: the type of memory to allocate (see kmalloc) + */ +static __must_check inline void * +krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, flags); +} +#endif + +#ifndef HAVE_KMALLOC_SIZE_ROUNDUP +size_t kmalloc_size_roundup(size_t size); +#endif + +#ifndef HAVE_KVREALLOC +extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags); +#endif + +#endif diff --git a/include/kcl/kcl_stddef.h b/include/kcl/kcl_stddef.h new file mode 100644 index 0000000000000..2656ab3239f48 --- /dev/null +++ b/include/kcl/kcl_stddef.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_KCL_STDDEF_H_ +#define _KCL_KCL_STDDEF_H_ + +#include +#ifndef sizeof_field +/** + * sizeof_field() - Report the size of a struct field in bytes + * + * @TYPE: The structure containing the field of interest + * @MEMBER: The field to return the size of + */ +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +#ifndef DECLARE_FLEX_ARRAY +/** + * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } +#endif + +#endif diff --git a/include/kcl/kcl_string_helpers.h b/include/kcl/kcl_string_helpers.h new file mode 100644 index 0000000000000..e02c0059b3ade --- /dev/null +++ b/include/kcl/kcl_string_helpers.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_STRING_HELPERS_H +#define AMDKCL_STRING_HELPERS_H + +#include +/* Copied from v5.17-rc2-224-gea4692c75e1c linux/string_helpers.h */ + +#ifndef HAVE_STR_YES_NO +static inline const char *str_yes_no(bool v) +{ + return v ? "yes" : "no"; +} + +static inline const char *str_on_off(bool v) +{ + return v ? "on" : "off"; +} + +static inline const char *str_enable_disable(bool v) +{ + return v ? "enable" : "disable"; +} + +static inline const char *str_enabled_disabled(bool v) +{ + return v ? "enabled" : "disabled"; +} + +#endif /* HAVE_STR_YES_NO */ +#endif diff --git a/include/kcl/kcl_suspend.h b/include/kcl/kcl_suspend.h new file mode 100644 index 0000000000000..fb2c02994f763 --- /dev/null +++ b/include/kcl/kcl_suspend.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_SUSPEND_H +#define AMDKCL_SUSPEND_H + +#ifndef HAVE_KSYS_SYNC_HELPER +#ifdef CONFIG_PM_SLEEP +extern void _kcl_ksys_sync_helper(void); + +static inline void ksys_sync_helper(void) +{ + _kcl_ksys_sync_helper(); +} +#else +static inline void ksys_sync_helper(void) {} +#endif /* CONFIG_PM_SLEEP */ +#endif /* HAVE_KSYS_SYNC_HELPER */ + +#ifndef HAVE_PM_SUSPEND_VIA_FIRMWARE +static inline bool pm_suspend_via_firmware(void) { return false; } +#endif /* HAVE_PM_SUSPEND_VIA_FIRMWARE */ + +#endif /* AMDKCL_SUSPEND_H */ diff --git a/include/kcl/kcl_sysfs_emit.h b/include/kcl/kcl_sysfs_emit.h new file mode 100644 index 0000000000000..381265a29b7e1 --- /dev/null +++ b/include/kcl/kcl_sysfs_emit.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sysfs.h - definitions for the device driver filesystem + * + * Copyright (c) 2001,2002 Patrick Mochel + * Copyright (c) 2004 Silicon Graphics, Inc. + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007 Tejun Heo + * + * Please see Documentation/filesystems/sysfs.rst for more information. + */ +#include + +#ifndef HAVE_SYSFS_EMIT +#ifdef CONFIG_SYSFS +__printf(2, 3) +int sysfs_emit(char *buf, const char *fmt, ...); + +__printf(3, 4) +int sysfs_emit_at(char *buf, int at, const char *fmt, ...); + +#else +__printf(2, 3) +static inline int sysfs_emit(char *buf, const char *fmt, ...) +{ + return 0; +} + +__printf(3, 4) +static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) +{ + return 0; +} +#endif +#endif diff --git a/include/kcl/kcl_task_barrier.h b/include/kcl/kcl_task_barrier.h new file mode 100644 index 0000000000000..315bff4402dbc --- /dev/null +++ b/include/kcl/kcl_task_barrier.h @@ -0,0 +1,108 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef AMDKCL_DRM_TASK_BARRIER_H +#define AMDKCL_DRM_TASK_BARRIER_H + +#ifdef HAVE_DRM_TASK_BARRIER_H +#include +#else +/* Copied from include/drm/task_barrier.h */ +/* + * Reusable 2 PHASE task barrier (randevouz point) implementation for N tasks. + * Based on the Little book of sempahores - https://greenteapress.com/wp/semaphores/ + */ +#include +#include + +/* + * Represents an instance of a task barrier. + */ +struct task_barrier { + unsigned int n; + atomic_t count; + struct semaphore enter_turnstile; + struct semaphore exit_turnstile; +}; + +static inline void task_barrier_signal_turnstile(struct semaphore *turnstile, + unsigned int n) +{ + int i; + + for (i = 0 ; i < n; i++) + up(turnstile); +} + +static inline void task_barrier_init(struct task_barrier *tb) +{ + tb->n = 0; + atomic_set(&tb->count, 0); + sema_init(&tb->enter_turnstile, 0); + sema_init(&tb->exit_turnstile, 0); +} + +static inline void task_barrier_add_task(struct task_barrier *tb) +{ + tb->n++; +} + +static inline void task_barrier_rem_task(struct task_barrier *tb) +{ + tb->n--; +} + +/* + * Lines up all the threads BEFORE the critical point. + * + * When all thread passed this code the entry barrier is back to locked state. + */ +static inline void task_barrier_enter(struct task_barrier *tb) +{ + if (atomic_inc_return(&tb->count) == tb->n) + task_barrier_signal_turnstile(&tb->enter_turnstile, tb->n); + + down(&tb->enter_turnstile); +} + +/* + * Lines up all the threads AFTER the critical point. + * + * This function is used to avoid any one thread running ahead if the barrier is + * used repeatedly . + */ +static inline void task_barrier_exit(struct task_barrier *tb) +{ + if (atomic_dec_return(&tb->count) == 0) + task_barrier_signal_turnstile(&tb->exit_turnstile, tb->n); + + down(&tb->exit_turnstile); +} + +/* Convinieince function when nothing to be done in between entry and exit */ +static inline void task_barrier_full(struct task_barrier *tb) +{ + task_barrier_enter(tb); + task_barrier_exit(tb); +} +#endif +#endif diff --git a/include/kcl/kcl_timekeeping.h b/include/kcl/kcl_timekeeping.h new file mode 100644 index 0000000000000..644228c997baf --- /dev/null +++ b/include/kcl/kcl_timekeeping.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_LINUX_TIMEKEEPING_H +#define _KCL_LINUX_TIMEKEEPING_H +#include + +#ifndef HAVE_KTIME_GET_NS +static inline u64 ktime_get_ns(void) +{ + return ktime_to_ns(ktime_get()); +} +#endif + +#if !defined(HAVE_KTIME_GET_BOOTTIME_NS) +#if defined(HAVE_KTIME_GET_NS) +static inline u64 ktime_get_boottime_ns(void) +{ + return ktime_get_boot_ns(); +} +#else +static inline u64 ktime_get_boottime_ns(void) +{ + struct timespec time; + + get_monotonic_boottime(&time); + return (u64)timespec_to_ns(&time); +} +#endif /* HAVE_KTIME_GET_NS */ +#endif /* HAVE_KTIME_GET_BOOTTIME_NS */ + +#if !defined(HAVE_KTIME_GET_RAW_NS) +static inline u64 ktime_get_raw_ns(void) +{ + struct timespec time; + + getrawmonotonic(&time); + return (u64)timespec_to_ns(&time); +} +#endif + +#ifndef HAVE_JIFFIES64_TO_MSECS +extern u64 jiffies64_to_msecs(u64 j); +#endif + +#endif diff --git a/include/kcl/kcl_tracepoint.h b/include/kcl/kcl_tracepoint.h new file mode 100644 index 0000000000000..10eafc91c9486 --- /dev/null +++ b/include/kcl/kcl_tracepoint.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KCL_TRACEPOINT_H_ +#define _KCL_TRACEPOINT_H_ + +#include + +#ifdef HAVE_ASSIGN_STR_ONE_ARGUMENT +#define __amdkcl_assign_str(dst, src) __assign_str(dst) +#else +#define __amdkcl_assign_str(dst, src) __assign_str(dst, src) +#endif + +#endif diff --git a/include/kcl/kcl_types.h b/include/kcl/kcl_types.h new file mode 100644 index 0000000000000..66ff65a627e5a --- /dev/null +++ b/include/kcl/kcl_types.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_TYPES_H +#define AMDKCL_TYPES_H + +/* Copied from v4.15-rc1-4-g8ced390c2b18 include/uapi/linux/types.h */ +#ifndef HAVE_TYPE__POLL_T +#ifdef __CHECK_POLL +typedef unsigned __bitwise __poll_t; +#else +typedef unsigned __poll_t; +#endif +#endif +#endif diff --git a/include/kcl/kcl_units.h b/include/kcl/kcl_units.h new file mode 100644 index 0000000000000..21d9f45fb1c5e --- /dev/null +++ b/include/kcl/kcl_units.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef KCL_KCL_LINUX_UNITS_H +#define KCL_KCL_LINUX_UNITS_H + +#include + +#ifndef HZ_PER_MHZ +#define HZ_PER_MHZ 1000000UL +#endif + +#endif + diff --git a/include/kcl/kcl_uuid.h b/include/kcl/kcl_uuid.h new file mode 100644 index 0000000000000..be6580926dabc --- /dev/null +++ b/include/kcl/kcl_uuid.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef KCL_KCL_UUID_H +#define KCL_KCL_UUID_H + +#include + +#ifndef HAVE_IMPORT_GUID +static inline void import_guid(guid_t *dst, const __u8 *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} + +static inline void export_guid(__u8 *dst, const guid_t *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} +#endif + +#endif \ No newline at end of file diff --git a/include/kcl/kcl_version.h b/include/kcl/kcl_version.h new file mode 100644 index 0000000000000..59a859a26a540 --- /dev/null +++ b/include/kcl/kcl_version.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_VERSION_H +#define AMDKCL_VERSION_H + +#define DRM_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c)) +#define DRM_VERSION_CODE DRM_VERSION(DRM_VER, DRM_PATCH, DRM_SUB) + +#endif diff --git a/include/kcl/kcl_video.h b/include/kcl/kcl_video.h new file mode 100644 index 0000000000000..414cfdc2439a7 --- /dev/null +++ b/include/kcl/kcl_video.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef AMDKCL_VIDEO_H +#define AMDKCL_VIDEO_H + +#include + +#ifndef ACPI_VIDEO_NOTIFY_PROBE +#define ACPI_VIDEO_NOTIFY_PROBE 0x81 +#endif + +#endif/*AMDKCL_VIDEO_H*/ diff --git a/include/kcl/kcl_workqueue.h b/include/kcl/kcl_workqueue.h new file mode 100644 index 0000000000000..345bd0f2cc384 --- /dev/null +++ b/include/kcl/kcl_workqueue.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef KCL_LINUX_WORKQUEUE_H +#define KCL_LINUX_WORKQUEUE_H + +#include + +/* + * System-wide workqueues which are always present. + * + * system_highpri_wq is similar to system_wq but for work items which + * require WQ_HIGHPRI. + * + * v3.15-rc1-18-g73e4354444ee workqueue: declare system_highpri_wq + * v3.6-rc1-20-g1aabe902ca36 workqueue: introduce system_highpri_wq + */ +extern struct workqueue_struct *system_highpri_wq; + +#endif diff --git a/include/kcl/reservation.h b/include/kcl/reservation.h new file mode 100644 index 0000000000000..8dcc5e3c18479 --- /dev/null +++ b/include/kcl/reservation.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KCL_RESERVATION_H +#define KCL_RESERVATION_H + +#ifndef HAVE_LINUX_DMA_RESV_H +#include +#endif /* HAVE_LINUX_DMA_RESV_H */ + +#endif diff --git a/include/linux/closure.h b/include/linux/closure.h index 59b8c06b11ff3..2af44427107de 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -159,6 +159,7 @@ struct closure { #ifdef CONFIG_DEBUG_CLOSURES #define CLOSURE_MAGIC_DEAD 0xc054dead #define CLOSURE_MAGIC_ALIVE 0xc054a11e +#define CLOSURE_MAGIC_STACK 0xc05451cc unsigned int magic; struct list_head all; @@ -323,12 +324,18 @@ static inline void closure_init_stack(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif } static inline void closure_init_stack_release(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif } /** diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index bdf7f3eddf0a2..4c91a019972b6 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -19,6 +19,7 @@ enum fscache_cache_trace; enum fscache_cookie_trace; enum fscache_access_trace; +enum fscache_volume_trace; enum fscache_cache_state { FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ @@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie); extern void fscache_io_error(struct fscache_cache *cache); +extern struct fscache_volume * +fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); +extern void fscache_put_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 4da80e92f804f..278620e063ab2 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -112,7 +112,13 @@ static inline int fsnotify_file(struct file *file, __u32 mask) { const struct path *path; - if (file->f_mode & FMODE_NONOTIFY) + /* + * FMODE_NONOTIFY are fds generated by fanotify itself which should not + * generate new events. We also don't want to generate events for + * FMODE_PATH fds (involves open & close events) as they are just + * handle creation / destruction events and not "real" file events. + */ + if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) return 0; path = &file->f_path; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6c..d45bfb7cf81d0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2029,7 +2029,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 pcc_ifa2[0x1]; u8 reserved_at_3f1[0xf]; - u8 reserved_at_400[0x400]; + u8 reserved_at_400[0x40]; + + u8 reserved_at_440[0x8]; + u8 max_num_eqs_24b[0x18]; + u8 reserved_at_460[0x3a0]; }; enum mlx5_ifc_flow_destination_type { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 586a8f0104d73..1dc6248feb832 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1979,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn) static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); + struct mem_section_usage *usage = READ_ONCE(ms->usage); - return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); + return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 1acf5bac7f503..8c236c651d1d6 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio) static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - bool ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = false; + + rcu_read_lock(); + /* avoid writing to the vmemmap area being remapped */ + if (!page_is_fake_head(page) && page_ref_count(page) != u) + ret = atomic_add_unless(&page->_refcount, nr, u); + rcu_read_unlock(); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); @@ -258,54 +264,9 @@ static inline bool folio_try_get(struct folio *folio) return folio_ref_add_unless(folio, 1, 0); } -static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) -{ -#ifdef CONFIG_TINY_RCU - /* - * The caller guarantees the folio will not be freed from interrupt - * context, so (on !SMP) we only need preemption to be disabled - * and TINY_RCU does that for us. - */ -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); - folio_ref_add(folio, count); -#else - if (unlikely(!folio_ref_add_unless(folio, count, 0))) { - /* Either the folio has been freed, or will be freed. */ - return false; - } -#endif - return true; -} - -/** - * folio_try_get_rcu - Attempt to increase the refcount on a folio. - * @folio: The folio. - * - * This is a version of folio_try_get() optimised for non-SMP kernels. - * If you are still holding the rcu_read_lock() after looking up the - * page and know that the page cannot have its refcount decreased to - * zero in interrupt context, you can use this instead of folio_try_get(). - * - * Example users include get_user_pages_fast() (as pages are not unmapped - * from interrupt context) and the page cache lookups (as pages are not - * truncated from interrupt context). We also know that pages are not - * frozen in interrupt context for the purposes of splitting or migration. - * - * You can also use this function if you're holding a lock that prevents - * pages being frozen & removed; eg the i_pages lock for the page cache - * or the mmap_lock or page table lock for page tables. In this case, - * it will always succeed, and you could have used a plain folio_get(), - * but it's sometimes more convenient to have a common function called - * from both locked and RCU-protected contexts. - * - * Return: True if the reference count was successfully incremented. - */ -static inline bool folio_try_get_rcu(struct folio *folio) +static inline bool folio_ref_try_add(struct folio *folio, int count) { - return folio_ref_try_add_rcu(folio, 1); + return folio_ref_add_unless(folio, count, 0); } static inline int page_ref_freeze(struct page *page, int count) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 59f1df0cde5a0..a0a026d2d244d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) * a good order (that's 1MB if you're using 4kB pages) */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER #else -#define MAX_PAGECACHE_ORDER 8 +#define PREFERRED_MAX_PAGECACHE_ORDER 8 #endif +/* + * xas_split_alloc() does not support arbitrary orders. This implies no + * 512MB THP on ARM64 with 64KB base page size. + */ +#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) +#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) + /** * mapping_set_large_folios() - Indicate the file supports large folios. * @mapping: The file. diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e0..3be430cf31321 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1122,7 +1122,7 @@ struct phy_driver { u8 index, enum led_brightness value); /** - * @led_blink_set: Set a PHY LED brightness. Index indicates + * @led_blink_set: Set a PHY LED blinking. Index indicates * which of the PHYs led should be configured to blink. Delays * are in milliseconds and if both are zero then a sensible * default should be chosen. The call should adjust the diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6d..a5f4b48fca184 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2192,13 +2192,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } extern void sched_set_stop_task(int cpu, struct task_struct *stop); #ifdef CONFIG_MEM_ALLOC_PROFILING -static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) +static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) { swap(current->alloc_tag, tag); return tag; } -static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) +static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) { #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 98fdef6e28f2a..67b9a15a5330f 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -533,6 +533,9 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core + * @defer_optimize_message: set to true if controller cannot pre-optimize messages + * and needs to defer the optimization step until the message is actually + * being transferred * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -776,6 +779,7 @@ struct spi_controller { /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; bool must_async; + bool defer_optimize_message; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) diff --git a/include/linux/swap.h b/include/linux/swap.h index bd450023b9a4f..e685e93ba3547 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page) } /* linux/mm/workingset.c */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset); +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush); void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 63424af87bbab..fff820c3e93e3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -859,9 +859,15 @@ asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, const struct rlimit64 __user *new_rlim, struct rlimit64 __user *old_rlim); asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); +#if defined(CONFIG_ARCH_SPLIT_ARG64) +asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, + unsigned int mask_1, unsigned int mask_2, + int dfd, const char __user * pathname); +#else asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); +#endif asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, int __user *mnt_id, int flag); diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 21a67dc9efe80..e93ee8d936a9c 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -490,9 +490,16 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle) { } #endif + +static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip) +{ #ifdef CONFIG_TCG_TPM2_HMAC + return chip->auth; +#else + return NULL; +#endif +} -int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle, u8 *name); void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, @@ -504,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, u8 *passphrase, int passphraselen) { - tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, - passphraselen); + struct tpm_header *head; + int offset; + + if (tpm2_chip_auth(chip)) { + tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen); + } else { + offset = buf->handles * 4 + TPM_HEADER_SIZE; + head = (struct tpm_header *)buf->data; + + /* + * If the only sessions are optional, the command tag must change to + * TPM2_ST_NO_SESSIONS. + */ + if (tpm_buf_length(buf) == offset) + head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); + } } + +#ifdef CONFIG_TCG_TPM2_HMAC + +int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, int rc); @@ -521,56 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip) static inline void tpm2_end_auth_session(struct tpm_chip *chip) { } -static inline void tpm_buf_append_name(struct tpm_chip *chip, - struct tpm_buf *buf, - u32 handle, u8 *name) -{ - tpm_buf_append_u32(buf, handle); - /* count the number of handles in the upper bits of flags */ - buf->handles++; -} -static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, u8 *passphrase, - int passphraselen) -{ - /* offset tells us where the sessions area begins */ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - u32 len = 9 + passphraselen; - - if (tpm_buf_length(buf) != offset) { - /* not the first session so update the existing length */ - len += get_unaligned_be32(&buf->data[offset]); - put_unaligned_be32(len, &buf->data[offset]); - } else { - tpm_buf_append_u32(buf, len); - } - /* auth handle */ - tpm_buf_append_u32(buf, TPM2_RS_PW); - /* nonce */ - tpm_buf_append_u16(buf, 0); - /* attributes */ - tpm_buf_append_u8(buf, 0); - /* passphrase */ - tpm_buf_append_u16(buf, passphraselen); - tpm_buf_append(buf, passphrase, passphraselen); -} -static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, - u8 *passphrase, - int passphraselen) -{ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - struct tpm_header *head = (struct tpm_header *) buf->data; - - /* - * if the only sessions are optional, the command tag - * must change to TPM2_ST_NO_SESSIONS - */ - if (tpm_buf_length(buf) == offset) - head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); -} static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) { diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe932ca3bc8ca..e372a88e8c3f6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -324,6 +324,17 @@ enum { * claim to support it. */ HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, + + /* + * When this quirk is set, the reserved bits of Primary/Secondary_PHY + * inside the LE Extended Advertising Report events are discarded. + * This is required for some Apple/Broadcom controllers which + * abuse these reserved bits for unrelated flags. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 6a9d063e9f472..534c3386e714f 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -38,6 +38,8 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout, struct sock *sk); +int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cafc664ee5318..45ad37adbe328 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -395,7 +395,7 @@ enum ieee80211_bss_change { BSS_CHANGED_HE_OBSS_PD = 1<<28, BSS_CHANGED_HE_BSS_COLOR = 1<<29, BSS_CHANGED_FILS_DISCOVERY = 1<<30, - BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, + BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = BIT_ULL(31), BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), diff --git a/include/net/tcx.h b/include/net/tcx.h index 72a3e75e539fb..5ce0ce9e0c022 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -13,7 +13,7 @@ struct mini_Qdisc; struct tcx_entry { struct mini_Qdisc __rcu *miniq; struct bpf_mprog_bundle bundle; - bool miniq_active; + u32 miniq_active; struct rcu_head rcu; }; @@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress) tcx_dec(); } -static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry, - const bool active) +static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); - tcx_entry(entry)->miniq_active = active; + tcx_entry(entry)->miniq_active++; +} + +static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry) +{ + ASSERT_RTNL(); + tcx_entry(entry)->miniq_active--; } static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index fadf406b52605..c978fa2893a53 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2556,9 +2556,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count, TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr, + u64 last_root_id, u64 last_ino), - TP_ARGS(fs_info, nr_to_scan, nr), + TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino), TP_STRUCT__entry_btrfs( __field( long, nr_to_scan ) @@ -2570,8 +2571,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TP_fast_assign_btrfs(fs_info, __entry->nr_to_scan = nr_to_scan; __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = last_root_id; + __entry->last_ino = last_ino; ), TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", @@ -2581,9 +2582,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr, + u64 last_root_id, u64 last_ino), - TP_ARGS(fs_info, nr_dropped, nr), + TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino), TP_STRUCT__entry_btrfs( __field( long, nr_dropped ) @@ -2595,8 +2597,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, TP_fast_assign_btrfs(fs_info, __entry->nr_dropped = nr_dropped; __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = last_root_id; + __entry->last_ino = last_ino; ), TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index a6190aa1b4060..f1a73aa83fbbf 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -35,12 +35,14 @@ enum fscache_volume_trace { fscache_volume_get_cookie, fscache_volume_get_create_work, fscache_volume_get_hash_collision, + fscache_volume_get_withdraw, fscache_volume_free, fscache_volume_new_acquire, fscache_volume_put_cookie, fscache_volume_put_create_work, fscache_volume_put_hash_collision, fscache_volume_put_relinquish, + fscache_volume_put_withdraw, fscache_volume_see_create_work, fscache_volume_see_hash_wake, fscache_volume_wait_create_work, @@ -120,12 +122,14 @@ enum fscache_access_trace { EM(fscache_volume_get_cookie, "GET cook ") \ EM(fscache_volume_get_create_work, "GET creat") \ EM(fscache_volume_get_hash_collision, "GET hcoll") \ + EM(fscache_volume_get_withdraw, "GET withd") \ EM(fscache_volume_free, "FREE ") \ EM(fscache_volume_new_acquire, "NEW acq ") \ EM(fscache_volume_put_cookie, "PUT cook ") \ EM(fscache_volume_put_create_work, "PUT creat") \ EM(fscache_volume_put_hash_collision, "PUT hcoll") \ EM(fscache_volume_put_relinquish, "PUT relnq") \ + EM(fscache_volume_put_withdraw, "PUT withd") \ EM(fscache_volume_see_create_work, "SEE creat") \ EM(fscache_volume_see_hash_wake, "SEE hwake") \ E_(fscache_volume_wait_create_work, "WAIT crea") diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index efe5de6ce208a..6daa4a9434325 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -32,7 +32,7 @@ #ifndef __AMDGPU_DRM_H__ #define __AMDGPU_DRM_H__ -#include "drm.h" +#include #if defined(__cplusplus) extern "C" { @@ -54,6 +54,11 @@ extern "C" { #define DRM_AMDGPU_VM 0x13 #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 #define DRM_AMDGPU_SCHED 0x15 +/* not upstream */ +#define DRM_AMDGPU_GEM_DGMA 0x5c + +/* hybrid specific ioctls */ +#define DRM_AMDGPU_SEM 0x5b #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) @@ -72,6 +77,8 @@ extern "C" { #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) + /** * DOC: memory domains * @@ -98,6 +105,9 @@ extern "C" { * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for * signalling user mode queues. */ +/* hybrid specific ioctls */ +#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem) + #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 #define AMDGPU_GEM_DOMAIN_VRAM 0x4 @@ -105,13 +115,17 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 #define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 +#define AMDGPU_GEM_DOMAIN_DGMA 0x400 +#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT 0x800 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ - AMDGPU_GEM_DOMAIN_OA | \ - AMDGPU_GEM_DOMAIN_DOORBELL) + AMDGPU_GEM_DOMAIN_OA |\ + AMDGPU_GEM_DOMAIN_DOORBELL |\ + AMDGPU_GEM_DOMAIN_DGMA |\ + AMDGPU_GEM_DOMAIN_DGMA_IMPORT) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) @@ -174,6 +188,14 @@ extern "C" { /* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ #define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) +/* hybrid specific */ +/* Flag that the memory should be in SPARSE resource */ +#define AMDGPU_GEM_CREATE_SPARSE (1ULL << 29) +/* Flag that the memory allocation should be from top of domain */ +#define AMDGPU_GEM_CREATE_TOP_DOWN (1ULL << 30) +/* Flag that the memory allocation should be pinned */ +#define AMDGPU_GEM_CREATE_NO_EVICT (1ULL << 31) + struct drm_amdgpu_gem_create_in { /** the requested memory size */ __u64 bo_size; @@ -319,6 +341,35 @@ union drm_amdgpu_ctx { union drm_amdgpu_ctx_out out; }; +/* sem related */ +#define AMDGPU_SEM_OP_CREATE_SEM 1 +#define AMDGPU_SEM_OP_WAIT_SEM 2 +#define AMDGPU_SEM_OP_SIGNAL_SEM 3 +#define AMDGPU_SEM_OP_DESTROY_SEM 4 +#define AMDGPU_SEM_OP_IMPORT_SEM 5 +#define AMDGPU_SEM_OP_EXPORT_SEM 6 + +struct drm_amdgpu_sem_in { + /** AMDGPU_SEM_OP_* */ + uint32_t op; + uint32_t handle; + uint32_t ctx_id; + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; + uint64_t seq; +}; + +union drm_amdgpu_sem_out { + int32_t fd; + uint32_t handle; +}; + +union drm_amdgpu_sem { + struct drm_amdgpu_sem_in in; + union drm_amdgpu_sem_out out; +}; + /* vm ioctl */ #define AMDGPU_VM_OP_RESERVE_VMID 1 #define AMDGPU_VM_OP_UNRESERVE_VMID 2 @@ -375,6 +426,15 @@ struct drm_amdgpu_gem_userptr { __u32 handle; }; +#define AMDGPU_GEM_DGMA_IMPORT 0 +#define AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR 1 +struct drm_amdgpu_gem_dgma { + __u64 addr; + __u64 size; + __u32 op; + __u32 handle; +}; + /* SI-CI-VI: */ /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ #define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 @@ -924,6 +984,16 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow { /* query last page fault info */ #define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* Hybrid Stack Specific Defs*/ +/* gpu capability */ +#define AMDGPU_INFO_CAPABILITY 0x50 +/* virtual range */ +#define AMDGPU_INFO_VIRTUAL_RANGE 0x51 +/* query pin memory capability */ +#define AMDGPU_CAPABILITY_PIN_MEM_FLAG (1 << 0) +/* query direct gma capability */ +#define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG (1 << 1) + #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff #define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 @@ -980,6 +1050,11 @@ struct drm_amdgpu_info { __u32 flags; } read_mmr_reg; + struct { + uint32_t aperture; + uint32_t _pad; + } virtual_range; + struct drm_amdgpu_query_fw query_fw; struct { @@ -1077,6 +1152,8 @@ struct drm_amdgpu_info_vbios { #define AMDGPU_VRAM_TYPE_LPDDR4 11 #define AMDGPU_VRAM_TYPE_LPDDR5 12 +#define AMDGPU_VRAM_TYPE_HBM_WIDTH 4096 + struct drm_amdgpu_info_device { /** PCI Device ID */ __u32 device_id; @@ -1283,6 +1360,7 @@ struct drm_amdgpu_info_gpuvm_fault { #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ #define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ +#ifndef HAVE_DRM_COLOR_CTM_3X4 /* FIXME wrong namespace! */ struct drm_color_ctm_3x4 { /* @@ -1291,6 +1369,35 @@ struct drm_color_ctm_3x4 { */ __u64 matrix[12]; }; +#endif + +/** + * Definition of System Unified Address (SUA) apertures + */ +#define AMDGPU_SUA_APERTURE_PRIVATE 1 +#define AMDGPU_SUA_APERTURE_SHARED 2 +struct drm_amdgpu_virtual_range { + uint64_t start; + uint64_t end; +}; + +struct drm_amdgpu_capability { + __u32 flag; + __u32 direct_gma_size; +}; + +/* + * Definition of free sync enter and exit signals + * We may have more options in the future + */ +#define AMDGPU_FREESYNC_FULLSCREEN_ENTER 1 +#define AMDGPU_FREESYNC_FULLSCREEN_EXIT 2 + +struct drm_amdgpu_freesync { + __u32 op; /* AMDGPU_FREESYNC_FULLSCREEN_ENTER or */ + /* AMDGPU_FREESYNC_FULLSCREEN_ENTER */ + __u32 spare[7]; +}; #if defined(__cplusplus) } diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index aaed8e12ad0b6..926b1deb11166 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { * Must be 64-bit/8-byte aligned (the size of a CS instruction) * * Can be zero if stream_addr is zero too. + * + * When the stream size is zero, the queue submit serves as a + * synchronization point. */ __u32 stream_size; @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { * ensure the GPU doesn't get garbage when reading the indirect command * stream buffers. If you want the cache flush to happen * unconditionally, pass a zero here. + * + * Ignored when stream_size is zero. */ __u32 latest_flush; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 71a7ce5f2d4c0..d3b678708d381 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -242,6 +242,17 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_INVALID_FD 0xffffffff +struct kfd_ioctl_dbg_trap_args_deprecated { + __u64 exception_mask; /* to KFD */ + __u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */ + __u32 pid; /* to KFD */ + __u32 op; /* to KFD */ + __u32 data1; /* to KFD */ + __u32 data2; /* to KFD */ + __u32 data3; /* to KFD */ + __u32 data4; /* to KFD */ +}; + /* Matching HSA_EVENTTYPE */ #define KFD_IOC_EVENT_SIGNAL 0 #define KFD_IOC_EVENT_NODECHANGE 1 @@ -540,26 +551,29 @@ enum kfd_smi_event { KFD_SMI_EVENT_ALL_PROCESS = 64 }; +/* The reason of the page migration event */ enum KFD_MIGRATE_TRIGGERS { - KFD_MIGRATE_TRIGGER_PREFETCH, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, - KFD_MIGRATE_TRIGGER_TTM_EVICTION + KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU VRAM or system memory */ + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, /* GPU page fault recover */ + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, /* CPU page fault recover */ + KFD_MIGRATE_TRIGGER_TTM_EVICTION /* TTM eviction */ }; +/* The reason of user queue evition event */ enum KFD_QUEUE_EVICTION_TRIGGERS { - KFD_QUEUE_EVICTION_TRIGGER_SVM, - KFD_QUEUE_EVICTION_TRIGGER_USERPTR, - KFD_QUEUE_EVICTION_TRIGGER_TTM, - KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, - KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, - KFD_QUEUE_EVICTION_CRIU_RESTORE + KFD_QUEUE_EVICTION_TRIGGER_SVM, /* SVM buffer migration */ + KFD_QUEUE_EVICTION_TRIGGER_USERPTR, /* userptr movement */ + KFD_QUEUE_EVICTION_TRIGGER_TTM, /* TTM move buffer */ + KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, /* GPU suspend */ + KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, /* CRIU checkpoint */ + KFD_QUEUE_EVICTION_CRIU_RESTORE /* CRIU restore */ }; +/* The reason of unmap buffer from GPU event */ enum KFD_SVM_UNMAP_TRIGGERS { - KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, - KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE, - KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, /* MMU notifier CPU buffer movement */ + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */ + KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU /* Unmap to free the buffer */ }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) @@ -570,6 +584,152 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/** + * kfd_ioctl_spm_op - SPM ioctl operations + * + * @KFD_IOCTL_SPM_OP_ACQUIRE: acquire exclusive access to SPM + * @KFD_IOCTL_SPM_OP_RELEASE: release exclusive access to SPM + * @KFD_IOCTL_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM streaming + */ +enum kfd_ioctl_spm_op { + KFD_IOCTL_SPM_OP_ACQUIRE, + KFD_IOCTL_SPM_OP_RELEASE, + KFD_IOCTL_SPM_OP_SET_DEST_BUF +}; + +/** + * kfd_ioctl_spm_args - Arguments for SPM ioctl + * + * @op[in]: specifies the operation to perform + * @gpu_id[in]: GPU ID of the GPU to profile + * @dst_buf[in]: used for the address of the destination buffer + * in @KFD_IOCTL_SPM_SET_DEST_BUFFER + * @buf_size[in]: size of the destination buffer + * @timeout[in/out]: [in]: timeout in milliseconds, [out]: amount of time left + * `in the timeout window + * @bytes_copied[out]: amount of data that was copied to the previous dest_buf + * @has_data_loss: boolean indicating whether data was lost + * (e.g. due to a ring-buffer overflow) + * + * This ioctl performs different functions depending on the @op parameter. + * + * KFD_IOCTL_SPM_OP_ACQUIRE + * ------------------------ + * + * Acquires exclusive access of SPM on the specified @gpu_id for the calling process. + * This must be called before using KFD_IOCTL_SPM_OP_SET_DEST_BUF. + * + * KFD_IOCTL_SPM_OP_RELEASE + * ------------------------ + * + * Releases exclusive access of SPM on the specified @gpu_id for the calling process, + * which allows another process to acquire it in the future. + * + * KFD_IOCTL_SPM_OP_SET_DEST_BUF + * ----------------------------- + * + * If @dst_buf is NULL, the destination buffer address is unset and copying of counters + * is stopped. + * + * If @dst_buf is not NULL, it specifies the pointer to a new destination buffer. + * @buf_size specifies the size of the buffer. + * + * If @timeout is non-0, the call will wait for up to @timeout ms for the previous + * buffer to be filled. If previous buffer to be filled before timeout, the @timeout + * will be updated value with the time remaining. If the timeout is exceeded, the function + * copies any partial data available into the previous user buffer and returns success. + * The amount of valid data in the previous user buffer is indicated by @bytes_copied. + * + * If @timeout is 0, the function immediately replaces the previous destination buffer + * without waiting for the previous buffer to be filled. That means the previous buffer + * may only be partially filled, and @bytes_copied will indicate how much data has been + * copied to it. + * + * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will be non-0. + * + * Returns negative error code on failure, 0 on success. + */ +struct kfd_ioctl_spm_args { + __u64 dest_buf; + __u32 buf_size; + __u32 op; + __u32 timeout; + __u32 gpu_id; + __u32 bytes_copied; + __u32 has_data_loss; +}; + +/* + * SVM event tracing via SMI system management interface + * + * Open event file descriptor + * use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file + * descriptor to receive SMI events. + * If calling with sudo permission, then file descriptor can be used to receive + * SVM events from all processes, otherwise, to only receive SVM events of same + * process. + * + * To enable the SVM event + * Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap + * mask to start record the event to the kfifo, use bitmap mask combination + * for multiple events. New event mask will overwrite the previous event mask. + * KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo + * permisson to receive SVM events from all process. + * + * To receive the event + * Application can poll file descriptor to wait for the events, then read event + * from the file into a buffer. Each event is one line string message, starting + * with the event id, then the event specific information. + * + * To decode event information + * The following event format string macro can be used with sscanf to decode + * the specific event information. + * event triggers: the reason to generate the event, defined as enum for unmap, + * eviction and migrate events. + * node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory. + * addr: user mode address, in pages + * size: in pages + * pid: the process ID to generate the event + * ns: timestamp in nanosecond-resolution, starts at system boot time but + * stops during suspend + * migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update + * rw: 'W' for write page fault, 'R' for read page fault + * rescheduled: 'R' if the queue restore failed and rescheduled to try again + */ +#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ + "%x %s\n", (reset_seq_num), (reset_cause) + +#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\ + "%llx:%llx\n", (bitmask), (counter) + +#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\ + "%x:%s\n", (pid), (task_name) + +#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\ + "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw) + +#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\ + "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update) + +#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\ + preferred_loc, migrate_trigger)\ + "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\ + (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger) + +#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\ + "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\ + (from), (to), (migrate_trigger) + +#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\ + "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger) + +#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\ + "%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled) + +#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\ + "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ + (node), (unmap_trigger) + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * @@ -645,7 +805,6 @@ struct kfd_criu_bo_bucket { /* CRIU IOCTLs - END */ /**************************************************************************************************/ - /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -653,6 +812,39 @@ enum kfd_mmio_remap { KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, }; +struct kfd_ioctl_ipc_export_handle_args { + __u64 handle; /* to KFD */ + __u32 share_handle[4]; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 flags; /* to KFD */ +}; + +struct kfd_ioctl_ipc_import_handle_args { + __u64 handle; /* from KFD */ + __u64 va_addr; /* to KFD */ + __u64 mmap_offset; /* from KFD */ + __u32 share_handle[4]; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 flags; /* from KFD */ +}; + +struct kfd_ioctl_cross_memory_copy_deprecated_args { + /* to KFD: Process ID of the remote process */ + __u32 pid; + /* to KFD: See above definition */ + __u32 flags; + /* to KFD: Source GPU VM range */ + __u64 src_mem_range_array; + /* to KFD: Size of above array */ + __u64 src_mem_array_size; + /* to KFD: Destination GPU VM range */ + __u64 dst_mem_range_array; + /* to KFD: Size of above array */ + __u64 dst_mem_array_size; + /* from KFD: Total amount of bytes copied */ + __u64 bytes_copied; +}; + /* Guarantee host access to memory */ #define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 /* Fine grained coherency between all devices with access */ @@ -1465,6 +1657,62 @@ struct kfd_ioctl_dbg_trap_args { }; }; +/** + * kfd_ioctl_pc_sample_op - PC Sampling ioctl operations + * + * @KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES: Query device PC Sampling capabilities + * @KFD_IOCTL_PCS_OP_CREATE: Register this process with a per-device PC sampler instance + * @KFD_IOCTL_PCS_OP_DESTROY: Unregister from a previously registered PC sampler instance + * @KFD_IOCTL_PCS_OP_START: Process begins taking samples from a previously registered PC sampler instance + * @KFD_IOCTL_PCS_OP_STOP: Process stops taking samples from a previously registered PC sampler instance + */ +enum kfd_ioctl_pc_sample_op { + KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES, + KFD_IOCTL_PCS_OP_CREATE, + KFD_IOCTL_PCS_OP_DESTROY, + KFD_IOCTL_PCS_OP_START, + KFD_IOCTL_PCS_OP_STOP, +}; + +/* Values have to be a power of 2*/ +#define KFD_IOCTL_PCS_FLAG_POWER_OF_2 0x00000001 + +enum kfd_ioctl_pc_sample_method { + KFD_IOCTL_PCS_METHOD_HOSTTRAP = 1, + KFD_IOCTL_PCS_METHOD_STOCHASTIC, +}; + +enum kfd_ioctl_pc_sample_type { + KFD_IOCTL_PCS_TYPE_TIME_US, + KFD_IOCTL_PCS_TYPE_CLOCK_CYCLES, + KFD_IOCTL_PCS_TYPE_INSTRUCTIONS +}; + +struct kfd_pc_sample_info { + __u64 interval; /* [IN] if PCS_TYPE_INTERVAL_US: sample interval in us + * if PCS_TYPE_CLOCK_CYCLES: sample interval in graphics core clk cycles + * if PCS_TYPE_INSTRUCTIONS: sample interval in instructions issued by + * graphics compute units + */ + __u64 interval_min; /* [OUT] */ + __u64 interval_max; /* [OUT] */ + __u64 flags; /* [OUT] indicate potential restrictions e.g FLAG_POWER_OF_2 */ + __u32 method; /* [IN/OUT] kfd_ioctl_pc_sample_method */ + __u32 type; /* [IN/OUT] kfd_ioctl_pc_sample_type */ +}; + +#define KFD_IOCTL_PCS_QUERY_TYPE_FULL (1 << 0) /* If not set, return current */ + +struct kfd_ioctl_pc_sample_args { + __u64 sample_info_ptr; /* array of kfd_pc_sample_info */ + __u32 num_sample_info; + __u32 op; /* kfd_ioctl_pc_sample_op */ + __u32 gpu_id; + __u32 trace_id; + __u32 flags; /* kfd_ioctl_pcs_query flags */ + __u32 version; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -1588,4 +1836,26 @@ struct kfd_ioctl_dbg_trap_args { #define AMDKFD_COMMAND_START 0x01 #define AMDKFD_COMMAND_END 0x27 +/* non-upstream ioctls */ +#define AMDKFD_IOC_IPC_IMPORT_HANDLE \ + AMDKFD_IOWR(0x80, struct kfd_ioctl_ipc_import_handle_args) + +#define AMDKFD_IOC_IPC_EXPORT_HANDLE \ + AMDKFD_IOWR(0x81, struct kfd_ioctl_ipc_export_handle_args) + +#define AMDKFD_IOC_DBG_TRAP_DEPRECATED \ + AMDKFD_IOWR(0x82, struct kfd_ioctl_dbg_trap_args_deprecated) + +#define AMDKFD_IOC_CROSS_MEMORY_COPY_DEPRECATED \ + AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_deprecated_args) + +#define AMDKFD_IOC_RLC_SPM \ + AMDKFD_IOWR(0x84, struct kfd_ioctl_spm_args) + +#define AMDKFD_IOC_PC_SAMPLE \ + AMDKFD_IOWR(0x85, struct kfd_ioctl_pc_sample_args) + +#define AMDKFD_COMMAND_START_2 0x80 +#define AMDKFD_COMMAND_END_2 0x86 + #endif diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h index bd10667542209..c102ef35d11e4 100644 --- a/include/uapi/linux/trace_mmap.h +++ b/include/uapi/linux/trace_mmap.h @@ -43,6 +43,6 @@ struct trace_buffer_meta { __u64 Reserved2; }; -#define TRACE_MMAP_IOCTL_GET_READER _IO('T', 0x1) +#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20) #endif /* _TRACE_MMAP_H_ */ diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index f33d914d8f469..91583690bddc5 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,11 +8,14 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) diff --git a/io_uring/net.c b/io_uring/net.c index 7c98c4d509463..cf742bdd2a93e 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1127,16 +1127,18 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) flags |= MSG_DONTWAIT; retry_multishot: - kmsg->msg.msg_inq = -1; - kmsg->msg.msg_flags = 0; - if (io_do_buffer_select(req)) { ret = io_recv_buf_select(req, kmsg, &len, issue_flags); - if (unlikely(ret)) + if (unlikely(ret)) { + kmsg->msg.msg_inq = -1; goto out_free; + } sr->buf = NULL; } + kmsg->msg.msg_flags = 0; + kmsg->msg.msg_inq = -1; + if (flags & MSG_WAITALL) min_ret = iov_iter_count(&kmsg->msg.msg_iter); diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 976cb258a0edb..c938dea5ddbf3 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -782,8 +782,8 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, nbuckets = max_t(u32, 2, nbuckets); smap->bucket_log = ilog2(nbuckets); - smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), - nbuckets, GFP_USER | __GFP_NOWARN); + smap->buckets = bpf_map_kvcalloc(&smap->map, nbuckets, + sizeof(*smap->buckets), GFP_USER | __GFP_NOWARN); if (!smap->buckets) { err = -ENOMEM; goto free_smap; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 2a69a9a36c0fc..3243c83ef3e39 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1084,7 +1084,10 @@ struct bpf_async_cb { struct bpf_prog *prog; void __rcu *callback_fn; void *value; - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct work_struct delete_work; + }; u64 flags; }; @@ -1107,6 +1110,7 @@ struct bpf_async_cb { struct bpf_hrtimer { struct bpf_async_cb cb; struct hrtimer timer; + atomic_t cancelling; }; struct bpf_work { @@ -1219,6 +1223,21 @@ static void bpf_wq_delete_work(struct work_struct *work) kfree_rcu(w, cb.rcu); } +static void bpf_timer_delete_work(struct work_struct *work) +{ + struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work); + + /* Cancel the timer and wait for callback to complete if it was running. + * If hrtimer_cancel() can be safely called it's safe to call + * kfree_rcu(t) right after for both preallocated and non-preallocated + * maps. The async->cb = NULL was already done and no code path can see + * address 't' anymore. Timer if armed for existing bpf_hrtimer before + * bpf_timer_cancel_and_free will have been cancelled. + */ + hrtimer_cancel(&t->timer); + kfree_rcu(t, cb.rcu); +} + static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, enum bpf_async_type type) { @@ -1262,6 +1281,8 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u clockid = flags & (MAX_CLOCKS - 1); t = (struct bpf_hrtimer *)cb; + atomic_set(&t->cancelling, 0); + INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work); hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); t->timer.function = bpf_timer_cb; cb->value = (void *)async - map->record->timer_off; @@ -1440,7 +1461,8 @@ static void drop_prog_refcnt(struct bpf_async_cb *async) BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) { - struct bpf_hrtimer *t; + struct bpf_hrtimer *t, *cur_t; + bool inc = false; int ret = 0; if (in_nmi()) @@ -1452,14 +1474,41 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) ret = -EINVAL; goto out; } - if (this_cpu_read(hrtimer_running) == t) { + + cur_t = this_cpu_read(hrtimer_running); + if (cur_t == t) { /* If bpf callback_fn is trying to bpf_timer_cancel() * its own timer the hrtimer_cancel() will deadlock - * since it waits for callback_fn to finish + * since it waits for callback_fn to finish. */ ret = -EDEADLK; goto out; } + + /* Only account in-flight cancellations when invoked from a timer + * callback, since we want to avoid waiting only if other _callbacks_ + * are waiting on us, to avoid introducing lockups. Non-callback paths + * are ok, since nobody would synchronously wait for their completion. + */ + if (!cur_t) + goto drop; + atomic_inc(&t->cancelling); + /* Need full barrier after relaxed atomic_inc */ + smp_mb__after_atomic(); + inc = true; + if (atomic_read(&cur_t->cancelling)) { + /* We're cancelling timer t, while some other timer callback is + * attempting to cancel us. In such a case, it might be possible + * that timer t belongs to the other callback, or some other + * callback waiting upon it (creating transitive dependencies + * upon us), and we will enter a deadlock if we continue + * cancelling and waiting for it synchronously, since it might + * do the same. Bail! + */ + ret = -EDEADLK; + goto out; + } +drop: drop_prog_refcnt(&t->cb); out: __bpf_spin_unlock_irqrestore(&timer->lock); @@ -1467,6 +1516,8 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + if (inc) + atomic_dec(&t->cancelling); rcu_read_unlock(); return ret; } @@ -1512,25 +1563,39 @@ void bpf_timer_cancel_and_free(void *val) if (!t) return; - /* Cancel the timer and wait for callback to complete if it was running. - * If hrtimer_cancel() can be safely called it's safe to call kfree(t) - * right after for both preallocated and non-preallocated maps. - * The async->cb = NULL was already done and no code path can - * see address 't' anymore. - * - * Check that bpf_map_delete/update_elem() wasn't called from timer - * callback_fn. In such case don't call hrtimer_cancel() (since it will - * deadlock) and don't call hrtimer_try_to_cancel() (since it will just - * return -1). Though callback_fn is still running on this cpu it's + /* We check that bpf_map_delete/update_elem() was called from timer + * callback_fn. In such case we don't call hrtimer_cancel() (since it + * will deadlock) and don't call hrtimer_try_to_cancel() (since it will + * just return -1). Though callback_fn is still running on this cpu it's * safe to do kfree(t) because bpf_timer_cb() read everything it needed * from 't'. The bpf subprog callback_fn won't be able to access 't', * since async->cb = NULL was already done. The timer will be * effectively cancelled because bpf_timer_cb() will return * HRTIMER_NORESTART. + * + * However, it is possible the timer callback_fn calling us armed the + * timer _before_ calling us, such that failing to cancel it here will + * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. + * Therefore, we _need_ to cancel any outstanding timers before we do + * kfree_rcu, even though no more timers can be armed. + * + * Moreover, we need to schedule work even if timer does not belong to + * the calling callback_fn, as on two different CPUs, we can end up in a + * situation where both sides run in parallel, try to cancel one + * another, and we end up waiting on both sides in hrtimer_cancel + * without making forward progress, since timer1 depends on time2 + * callback to finish, and vice versa. + * + * CPU 1 (timer1_cb) CPU 2 (timer2_cb) + * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1) + * + * To avoid these issues, punt to workqueue context when we are in a + * timer callback. */ - if (this_cpu_read(hrtimer_running) != t) - hrtimer_cancel(&t->timer); - kfree_rcu(t, cb.rcu); + if (this_cpu_read(hrtimer_running)) + queue_work(system_unbound_wq, &t->cb.delete_work); + else + bpf_timer_delete_work(&t->cb.delete_work); } /* This function is called by map_delete/update_elem for individual element and diff --git a/kernel/exit.c b/kernel/exit.c index f95a2c1338a85..81fcee45d6302 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -484,6 +484,8 @@ void mm_update_next_owner(struct mm_struct *mm) * Search through everything else, we should not get here often. */ for_each_process(g) { + if (atomic_read(&mm->mm_users) <= 1) + break; if (g->flags & PF_KTHREAD) continue; for_each_thread(g, c) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bcf2c4cc05227..59ce0841eb1fd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) rq->prev_irq_time += irq_delta; delta -= irq_delta; - psi_account_irqtime(rq->curr, irq_delta); delayacct_irq(rq->curr, irq_delta); #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING @@ -5665,7 +5664,7 @@ void sched_tick(void) { int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); - struct task_struct *curr = rq->curr; + struct task_struct *curr; struct rq_flags rf; unsigned long hw_pressure; u64 resched_latency; @@ -5677,6 +5676,9 @@ void sched_tick(void) rq_lock(rq, &rf); + curr = rq->curr; + psi_account_irqtime(rq, curr, NULL); + update_rq_clock(rq); hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); @@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) ++*switch_count; migrate_disable_switch(rq, prev); + psi_account_irqtime(rq, prev, next); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c75d1307d86d5..9bedd148f0075 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) * The replenish timer needs to be canceled. No * problem if it fires concurrently: boosted threads * are ignored in dl_task_timer(). + * + * If the timer callback was running (hrtimer_try_to_cancel == -1), + * it will eventually call put_task_struct(). */ - hrtimer_try_to_cancel(&p->dl.dl_timer); + if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 && + !dl_server(&p->dl)) + put_task_struct(p); p->dl.dl_throttled = 0; } } else if (!dl_prio(p->normal_prio)) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8a5b1ae0aa55a..24dda708b6993 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env) break; env->loop++; - /* - * We've more or less seen every task there is, call it quits - * unless we haven't found any movable task yet. - */ - if (env->loop > env->loop_max && - !(env->flags & LBF_ALL_PINNED)) + /* We've more or less seen every task there is, call it quits */ + if (env->loop > env->loop_max) break; /* take a breather every nr_migrate tasks */ @@ -11393,9 +11389,7 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq, if (env.flags & LBF_NEED_BREAK) { env.flags &= ~LBF_NEED_BREAK; - /* Stop if we tried all running tasks */ - if (env.loop < busiest->nr_running) - goto more_balance; + goto more_balance; } /* diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7b4aa5809c0f6..507d7b8d79afa 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu, enum psi_states s; u32 state_mask; + lockdep_assert_rq_held(cpu_rq(cpu)); groupc = per_cpu_ptr(group->pcpu, cpu); /* @@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, } #ifdef CONFIG_IRQ_TIME_ACCOUNTING -void psi_account_irqtime(struct task_struct *task, u32 delta) +void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev) { - int cpu = task_cpu(task); + int cpu = task_cpu(curr); struct psi_group *group; struct psi_group_cpu *groupc; - u64 now; + u64 now, irq; + s64 delta; if (static_branch_likely(&psi_disabled)) return; - if (!task->pid) + if (!curr->pid) + return; + + lockdep_assert_rq_held(rq); + group = task_psi_group(curr); + if (prev && task_psi_group(prev) == group) return; now = cpu_clock(cpu); + irq = irq_time_read(cpu); + delta = (s64)(irq - rq->psi_irq_time); + if (delta < 0) + return; + rq->psi_irq_time = irq; - group = task_psi_group(task); do { if (!group->enabled) continue; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a831af1020700..ef20c61004ebf 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1126,6 +1126,7 @@ struct rq { #ifdef CONFIG_IRQ_TIME_ACCOUNTING u64 prev_irq_time; + u64 psi_irq_time; #endif #ifdef CONFIG_PARAVIRT u64 prev_steal_time; diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 38f3698f5e5b3..b02dfc3229510 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -110,8 +110,12 @@ __schedstats_from_se(struct sched_entity *se) void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_switch(struct task_struct *prev, struct task_struct *next, bool sleep); -void psi_account_irqtime(struct task_struct *task, u32 delta); - +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev); +#else +static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr, + struct task_struct *prev) {} +#endif /*CONFIG_IRQ_TIME_ACCOUNTING */ /* * PSI tracks state that persists across sleeps, such as iowaits and * memory stalls. As a result, it has to distinguish between sleeps, @@ -192,7 +196,8 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {} static inline void psi_sched_switch(struct task_struct *prev, struct task_struct *next, bool sleep) {} -static inline void psi_account_irqtime(struct task_struct *task, u32 delta) {} +static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr, + struct task_struct *prev) {} #endif /* CONFIG_PSI */ #ifdef CONFIG_SCHED_INFO diff --git a/lib/Makefile b/lib/Makefile index 3b17690456514..30337431d10e3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -426,3 +426,7 @@ $(obj)/$(TEST_FORTIFY_LOG): $(addprefix $(obj)/, $(TEST_FORTIFY_LOGS)) FORCE ifeq ($(CONFIG_FORTIFY_SOURCE),y) $(obj)/string.o: $(obj)/$(TEST_FORTIFY_LOG) endif + +# Some architectures define __NO_FORTIFY if __SANITIZE_ADDRESS__ is undefined. +# Pass CFLAGS_KASAN to avoid warnings. +$(foreach x, $(patsubst %.log,%.o,$(TEST_FORTIFY_LOGS)), $(eval KASAN_SANITIZE_$(x) := y)) diff --git a/lib/build_OID_registry b/lib/build_OID_registry index 56d8bafeb848b..8267e8d71338b 100755 --- a/lib/build_OID_registry +++ b/lib/build_OID_registry @@ -38,7 +38,9 @@ close IN_FILE || die; # open C_FILE, ">$ARGV[1]" or die; print C_FILE "/*\n"; -print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n"; +my $scriptname = $0; +$scriptname =~ s#^\Q$abs_srctree/\E##; +print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n"; print C_FILE " */\n"; # diff --git a/lib/closure.c b/lib/closure.c index c971216d9d774..116afae2eed96 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -244,6 +244,9 @@ void closure_debug_destroy(struct closure *cl) { unsigned long flags; + if (cl->magic == CLOSURE_MAGIC_STACK) + return; + BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE); cl->magic = CLOSURE_MAGIC_DEAD; diff --git a/mm/damon/core.c b/mm/damon/core.c index 6392f1cc97a3e..e66823d6b10b0 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1358,14 +1358,31 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, * access frequencies are similar. This is for minimizing the monitoring * overhead under the dynamically changeable access pattern. If a merge was * unnecessarily made, later 'kdamond_split_regions()' will revert it. + * + * The total number of regions could be higher than the user-defined limit, + * max_nr_regions for some cases. For example, the user can update + * max_nr_regions to a number that lower than the current number of regions + * while DAMON is running. For such a case, repeat merging until the limit is + * met while increasing @threshold up to possible maximum level. */ static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, unsigned long sz_limit) { struct damon_target *t; - - damon_for_each_target(t, c) - damon_merge_regions_of(t, threshold, sz_limit); + unsigned int nr_regions; + unsigned int max_thres; + + max_thres = c->attrs.aggr_interval / + (c->attrs.sample_interval ? c->attrs.sample_interval : 1); + do { + nr_regions = 0; + damon_for_each_target(t, c) { + damon_merge_regions_of(t, threshold, sz_limit); + nr_regions += damon_nr_regions(t); + } + threshold = max(1, threshold * 2); + } while (nr_regions > c->attrs.max_nr_regions && + threshold / 2 < max_thres); } /* diff --git a/mm/filemap.c b/mm/filemap.c index 876cc64aadd7c..657bcd887fdb8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1847,7 +1847,7 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) if (!folio || xa_is_value(folio)) goto out; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto repeat; if (unlikely(folio != xas_reload(&xas))) { @@ -2001,7 +2001,7 @@ static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max, if (!folio || xa_is_value(folio)) return folio; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto reset; if (unlikely(folio != xas_reload(xas))) { @@ -2181,7 +2181,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, if (xa_is_value(folio)) goto update_start; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto retry; if (unlikely(folio != xas_reload(&xas))) @@ -2313,7 +2313,7 @@ static void filemap_get_read_batch(struct address_space *mapping, break; if (xa_is_sibling(folio)) break; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) goto retry; if (unlikely(folio != xas_reload(&xas))) @@ -3124,7 +3124,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Use the readahead code, even if readahead is disabled */ - if (vm_flags & VM_HUGEPAGE) { + if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); ra->size = HPAGE_PMD_NR; @@ -3231,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) return 0; - ptep = pte_offset_map(vmf->pmd, vmf->address); + ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address, + &vmf->ptl); if (unlikely(!ptep)) return VM_FAULT_NOPAGE; @@ -3472,7 +3473,7 @@ static struct folio *next_uptodate_folio(struct xa_state *xas, continue; if (folio_test_locked(folio)) continue; - if (!folio_try_get_rcu(folio)) + if (!folio_try_get(folio)) continue; /* Has the page moved or been split? */ if (unlikely(folio != xas_reload(xas))) @@ -4248,6 +4249,9 @@ static void filemap_cachestat(struct address_space *mapping, XA_STATE(xas, &mapping->i_pages, first_index); struct folio *folio; + /* Flush stats (and potentially sleep) outside the RCU read section. */ + mem_cgroup_flush_stats_ratelimited(NULL); + rcu_read_lock(); xas_for_each(&xas, folio, last_index) { int order; @@ -4311,7 +4315,7 @@ static void filemap_cachestat(struct address_space *mapping, goto resched; } #endif - if (workingset_test_recent(shadow, true, &workingset)) + if (workingset_test_recent(shadow, true, &workingset, false)) cs->nr_recently_evicted += nr_pages; goto resched; diff --git a/mm/gup.c b/mm/gup.c index ca0f5cedce9b2..f1d6bc06eb523 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -76,7 +76,7 @@ static inline struct folio *try_get_folio(struct page *page, int refs) folio = page_folio(page); if (WARN_ON_ONCE(folio_ref_count(folio) < 0)) return NULL; - if (unlikely(!folio_ref_try_add_rcu(folio, refs))) + if (unlikely(!folio_ref_try_add(folio, refs))) return NULL; /* @@ -97,95 +97,6 @@ static inline struct folio *try_get_folio(struct page *page, int refs) return folio; } -/** - * try_grab_folio() - Attempt to get or pin a folio. - * @page: pointer to page to be grabbed - * @refs: the value to (effectively) add to the folio's refcount - * @flags: gup flags: these are the FOLL_* flag values. - * - * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. - * - * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the - * same time. (That's true throughout the get_user_pages*() and - * pin_user_pages*() APIs.) Cases: - * - * FOLL_GET: folio's refcount will be incremented by @refs. - * - * FOLL_PIN on large folios: folio's refcount will be incremented by - * @refs, and its pincount will be incremented by @refs. - * - * FOLL_PIN on single-page folios: folio's refcount will be incremented by - * @refs * GUP_PIN_COUNTING_BIAS. - * - * Return: The folio containing @page (with refcount appropriately - * incremented) for success, or NULL upon failure. If neither FOLL_GET - * nor FOLL_PIN was set, that's considered failure, and furthermore, - * a likely bug in the caller, so a warning is also emitted. - */ -struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) -{ - struct folio *folio; - - if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) - return NULL; - - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) - return NULL; - - if (flags & FOLL_GET) - return try_get_folio(page, refs); - - /* FOLL_PIN is set */ - - /* - * Don't take a pin on the zero page - it's not going anywhere - * and it is used in a *lot* of places. - */ - if (is_zero_page(page)) - return page_folio(page); - - folio = try_get_folio(page, refs); - if (!folio) - return NULL; - - /* - * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a - * right zone, so fail and let the caller fall back to the slow - * path. - */ - if (unlikely((flags & FOLL_LONGTERM) && - !folio_is_longterm_pinnable(folio))) { - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); - return NULL; - } - - /* - * When pinning a large folio, use an exact count to track it. - * - * However, be sure to *also* increment the normal folio - * refcount field at least once, so that the folio really - * is pinned. That's why the refcount from the earlier - * try_get_folio() is left intact. - */ - if (folio_test_large(folio)) - atomic_add(refs, &folio->_pincount); - else - folio_ref_add(folio, - refs * (GUP_PIN_COUNTING_BIAS - 1)); - /* - * Adjust the pincount before re-checking the PTE for changes. - * This is essentially a smp_mb() and is paired with a memory - * barrier in folio_try_share_anon_rmap_*(). - */ - smp_mb__after_atomic(); - - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); - - return folio; -} - static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { @@ -203,58 +114,59 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) } /** - * try_grab_page() - elevate a page's refcount by a flag-dependent amount - * @page: pointer to page to be grabbed - * @flags: gup flags: these are the FOLL_* flag values. + * try_grab_folio() - add a folio's refcount by a flag-dependent amount + * @folio: pointer to folio to be grabbed + * @refs: the value to (effectively) add to the folio's refcount + * @flags: gup flags: these are the FOLL_* flag values * * This might not do anything at all, depending on the flags argument. * * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. * * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same - * time. Cases: please see the try_grab_folio() documentation, with - * "refs=1". + * time. * * Return: 0 for success, or if no action was required (if neither FOLL_PIN * nor FOLL_GET was set, nothing is done). A negative error code for failure: * - * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not + * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not * be grabbed. + * + * It is called when we have a stable reference for the folio, typically in + * GUP slow path. */ -int __must_check try_grab_page(struct page *page, unsigned int flags) +int __must_check try_grab_folio(struct folio *folio, int refs, + unsigned int flags) { - struct folio *folio = page_folio(page); - if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) return -ENOMEM; - if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page))) return -EREMOTEIO; if (flags & FOLL_GET) - folio_ref_inc(folio); + folio_ref_add(folio, refs); else if (flags & FOLL_PIN) { /* * Don't take a pin on the zero page - it's not going anywhere * and it is used in a *lot* of places. */ - if (is_zero_page(page)) + if (is_zero_folio(folio)) return 0; /* - * Similar to try_grab_folio(): be sure to *also* - * increment the normal page refcount field at least once, + * Increment the normal page refcount field at least once, * so that the page really is pinned. */ if (folio_test_large(folio)) { - folio_ref_add(folio, 1); - atomic_add(1, &folio->_pincount); + folio_ref_add(folio, refs); + atomic_add(refs, &folio->_pincount); } else { - folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); + folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS); } - node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); } return 0; @@ -515,6 +427,102 @@ static int record_subpages(struct page *page, unsigned long sz, return nr; } + +/** + * try_grab_folio_fast() - Attempt to get or pin a folio in fast path. + * @page: pointer to page to be grabbed + * @refs: the value to (effectively) add to the folio's refcount + * @flags: gup flags: these are the FOLL_* flag values. + * + * "grab" names in this file mean, "look at flags to decide whether to use + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. + * + * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the + * same time. (That's true throughout the get_user_pages*() and + * pin_user_pages*() APIs.) Cases: + * + * FOLL_GET: folio's refcount will be incremented by @refs. + * + * FOLL_PIN on large folios: folio's refcount will be incremented by + * @refs, and its pincount will be incremented by @refs. + * + * FOLL_PIN on single-page folios: folio's refcount will be incremented by + * @refs * GUP_PIN_COUNTING_BIAS. + * + * Return: The folio containing @page (with refcount appropriately + * incremented) for success, or NULL upon failure. If neither FOLL_GET + * nor FOLL_PIN was set, that's considered failure, and furthermore, + * a likely bug in the caller, so a warning is also emitted. + * + * It uses add ref unless zero to elevate the folio refcount and must be called + * in fast path only. + */ +static struct folio *try_grab_folio_fast(struct page *page, int refs, + unsigned int flags) +{ + struct folio *folio; + + /* Raise warn if it is not called in fast GUP */ + VM_WARN_ON_ONCE(!irqs_disabled()); + + if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0)) + return NULL; + + if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page))) + return NULL; + + if (flags & FOLL_GET) + return try_get_folio(page, refs); + + /* FOLL_PIN is set */ + + /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ + if (is_zero_page(page)) + return page_folio(page); + + folio = try_get_folio(page, refs); + if (!folio) + return NULL; + + /* + * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a + * right zone, so fail and let the caller fall back to the slow + * path. + */ + if (unlikely((flags & FOLL_LONGTERM) && + !folio_is_longterm_pinnable(folio))) { + if (!put_devmap_managed_folio_refs(folio, refs)) + folio_put_refs(folio, refs); + return NULL; + } + + /* + * When pinning a large folio, use an exact count to track it. + * + * However, be sure to *also* increment the normal folio + * refcount field at least once, so that the folio really + * is pinned. That's why the refcount from the earlier + * try_get_folio() is left intact. + */ + if (folio_test_large(folio)) + atomic_add(refs, &folio->_pincount); + else + folio_ref_add(folio, + refs * (GUP_PIN_COUNTING_BIAS - 1)); + /* + * Adjust the pincount before re-checking the PTE for changes. + * This is essentially a smp_mb() and is paired with a memory + * barrier in folio_try_share_anon_rmap_*(). + */ + smp_mb__after_atomic(); + + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); + + return folio; +} #endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */ #ifdef CONFIG_ARCH_HAS_HUGEPD @@ -535,7 +543,7 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, */ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { unsigned long pte_end; struct page *page; @@ -558,9 +566,15 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz page = pte_page(pte); refs = record_subpages(page, sz, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); - if (!folio) - return 0; + if (fast) { + folio = try_grab_folio_fast(page, refs, flags); + if (!folio) + return 0; + } else { + folio = page_folio(page); + if (try_grab_folio(folio, refs, flags)) + return 0; + } if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, refs, flags); @@ -588,7 +602,7 @@ static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { pte_t *ptep; unsigned long sz = 1UL << hugepd_shift(hugepd); @@ -598,7 +612,8 @@ static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, ptep = hugepte_offset(hugepd, addr, pdshift); do { next = hugepte_addr_end(addr, end, sz); - ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr); + ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr, + fast); if (ret != 1) return ret; } while (ptep++, addr = next, addr != end); @@ -625,7 +640,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, ptep = hugepte_offset(hugepd, addr, pdshift); ptl = huge_pte_lock(h, vma->vm_mm, ptep); ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE, - flags, &page, &nr); + flags, &page, &nr, false); spin_unlock(ptl); if (ret == 1) { @@ -642,7 +657,7 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, - struct page **pages, int *nr) + struct page **pages, int *nr, bool fast) { return 0; } @@ -729,7 +744,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma, gup_must_unshare(vma, flags, page)) return ERR_PTR(-EMLINK); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) page = ERR_PTR(ret); else @@ -806,7 +821,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma, VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && !PageAnonExclusive(page), page); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) return ERR_PTR(ret); @@ -968,8 +983,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && !PageAnonExclusive(page), page); - /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ - ret = try_grab_page(page, flags); + /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */ + ret = try_grab_folio(page_folio(page), 1, flags); if (unlikely(ret)) { page = ERR_PTR(ret); goto out; @@ -1233,7 +1248,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, goto unmap; *page = pte_page(entry); } - ret = try_grab_page(*page, gup_flags); + ret = try_grab_folio(page_folio(*page), 1, gup_flags); if (unlikely(ret)) goto unmap; out: @@ -1636,20 +1651,19 @@ static long __get_user_pages(struct mm_struct *mm, * pages. */ if (page_increm > 1) { - struct folio *folio; + struct folio *folio = page_folio(page); /* * Since we already hold refcount on the * large folio, this should never fail. */ - folio = try_grab_folio(page, page_increm - 1, - foll_flags); - if (WARN_ON_ONCE(!folio)) { + if (try_grab_folio(folio, page_increm - 1, + foll_flags)) { /* * Release the 1st page ref if the * folio is problematic, fail hard. */ - gup_put_folio(page_folio(page), 1, + gup_put_folio(folio, 1, foll_flags); ret = -EFAULT; goto out; @@ -2797,7 +2811,6 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * This code is based heavily on the PowerPC implementation by Nick Piggin. */ #ifdef CONFIG_HAVE_GUP_FAST - /* * Used in the GUP-fast path to determine whether GUP is permitted to work on * a specific folio. @@ -2962,7 +2975,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - folio = try_grab_folio(page, 1, flags); + folio = try_grab_folio_fast(page, 1, flags); if (!folio) goto pte_unmap; @@ -3049,7 +3062,7 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr, break; } - folio = try_grab_folio(page, 1, flags); + folio = try_grab_folio_fast(page, 1, flags); if (!folio) { gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); break; @@ -3138,7 +3151,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, page = pmd_page(orig); refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3182,7 +3195,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, page = pud_page(orig); refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3222,7 +3235,7 @@ static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr, page = pgd_page(orig); refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr); - folio = try_grab_folio(page, refs, flags); + folio = try_grab_folio_fast(page, refs, flags); if (!folio) return 0; @@ -3276,7 +3289,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, * pmd format and THP pmd format */ if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr, - PMD_SHIFT, next, flags, pages, nr) != 1) + PMD_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, pages, nr)) @@ -3306,7 +3320,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, return 0; } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr, - PUD_SHIFT, next, flags, pages, nr) != 1) + PUD_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags, pages, nr)) @@ -3333,7 +3348,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, BUILD_BUG_ON(p4d_leaf(p4d)); if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr, - P4D_SHIFT, next, flags, pages, nr) != 1) + P4D_SHIFT, next, flags, pages, nr, + true) != 1) return 0; } else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) @@ -3362,7 +3378,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end, return; } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr, - PGDIR_SHIFT, next, flags, pages, nr) != 1) + PGDIR_SHIFT, next, flags, pages, nr, + true) != 1) return; } else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index db7946a0a28c4..2120f7478e55c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1331,7 +1331,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); - ret = try_grab_page(page, flags); + ret = try_grab_folio(page_folio(page), 1, flags); if (ret) page = ERR_PTR(ret); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f35abff8be60f..43e1af868cfdc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1625,13 +1625,10 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio, * folio appears as just a compound page. Otherwise, wait until after * allocating vmemmap to clear the flag. * - * A reference is held on the folio, except in the case of demote. - * * Must be called with hugetlb lock held. */ -static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus, - bool demote) +static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus) { int nid = folio_nid(folio); @@ -1645,6 +1642,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, list_del(&folio->lru); if (folio_test_hugetlb_freed(folio)) { + folio_clear_hugetlb_freed(folio); h->free_huge_pages--; h->free_huge_pages_node[nid]--; } @@ -1661,33 +1659,13 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, if (!folio_test_hugetlb_vmemmap_optimized(folio)) __folio_clear_hugetlb(folio); - /* - * In the case of demote we do not ref count the page as it will soon - * be turned into a page of smaller size. - */ - if (!demote) - folio_ref_unfreeze(folio, 1); - h->nr_huge_pages--; h->nr_huge_pages_node[nid]--; } -static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus) -{ - __remove_hugetlb_folio(h, folio, adjust_surplus, false); -} - -static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio, - bool adjust_surplus) -{ - __remove_hugetlb_folio(h, folio, adjust_surplus, true); -} - static void add_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus) { - int zeroed; int nid = folio_nid(folio); VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio); @@ -1711,21 +1689,6 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, */ folio_set_hugetlb_vmemmap_optimized(folio); - /* - * This folio is about to be managed by the hugetlb allocator and - * should have no users. Drop our reference, and check for others - * just in case. - */ - zeroed = folio_put_testzero(folio); - if (unlikely(!zeroed)) - /* - * It is VERY unlikely soneone else has taken a ref - * on the folio. In this case, we simply return as - * free_huge_folio() will be called when this other ref - * is dropped. - */ - return; - arch_clear_hugetlb_flags(folio); enqueue_hugetlb_folio(h, folio); } @@ -1762,13 +1725,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, return; } - /* - * Move PageHWPoison flag from head page to the raw error pages, - * which makes any healthy subpages reusable. - */ - if (unlikely(folio_test_hwpoison(folio))) - folio_clear_hugetlb_hwpoison(folio); - /* * If vmemmap pages were allocated above, then we need to clear the * hugetlb flag under the hugetlb lock. @@ -1779,6 +1735,15 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, spin_unlock_irq(&hugetlb_lock); } + /* + * Move PageHWPoison flag from head page to the raw error pages, + * which makes any healthy subpages reusable. + */ + if (unlikely(folio_test_hwpoison(folio))) + folio_clear_hugetlb_hwpoison(folio); + + folio_ref_unfreeze(folio, 1); + /* * Non-gigantic pages demoted from CMA allocated gigantic pages * need to be given back to CMA in free_gigantic_folio. @@ -2197,6 +2162,9 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, nid = numa_mem_id(); retry: folio = __folio_alloc(gfp_mask, order, nid, nmask); + /* Ensure hugetlb folio won't have large_rmappable flag set. */ + if (folio) + folio_clear_large_rmappable(folio); if (folio && !folio_ref_freeze(folio, 1)) { folio_put(folio); @@ -3079,11 +3047,8 @@ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h, free_new: spin_unlock_irq(&hugetlb_lock); - if (new_folio) { - /* Folio has a zero ref count, but needs a ref to be freed */ - folio_ref_unfreeze(new_folio, 1); + if (new_folio) update_and_free_hugetlb_folio(h, new_folio, false); - } return ret; } @@ -3938,7 +3903,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); - remove_hugetlb_folio_for_demote(h, folio, false); + remove_hugetlb_folio(h, folio, false); spin_unlock_irq(&hugetlb_lock); /* @@ -3952,7 +3917,6 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio) if (rc) { /* Allocation of vmemmmap failed, we can not demote folio */ spin_lock_irq(&hugetlb_lock); - folio_ref_unfreeze(folio, 1); add_hugetlb_folio(h, folio, false); return rc; } diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index b9a55322e52ce..8193906515c6e 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -446,6 +446,8 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); + if (!folio_test_hugetlb_vmemmap_optimized(folio)) return 0; @@ -481,6 +483,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h, */ int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio) { + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ + synchronize_rcu(); + return __hugetlb_vmemmap_restore_folio(h, folio, 0); } @@ -505,6 +510,9 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, long restored = 0; long ret = 0; + /* avoid writes from page_ref_add_unless() while unfolding vmemmap */ + synchronize_rcu(); + list_for_each_entry_safe(folio, t_folio, folio_list, lru) { if (folio_test_hugetlb_vmemmap_optimized(folio)) { ret = __hugetlb_vmemmap_restore_folio(h, folio, @@ -550,6 +558,8 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h, unsigned long vmemmap_reuse; VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio); + if (!vmemmap_should_optimize_folio(h, folio)) return ret; @@ -601,6 +611,9 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio) { LIST_HEAD(vmemmap_pages); + /* avoid writes from page_ref_add_unless() while folding vmemmap */ + synchronize_rcu(); + __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0); free_vmemmap_page_list(&vmemmap_pages); } @@ -644,6 +657,9 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l flush_tlb_all(); + /* avoid writes from page_ref_add_unless() while folding vmemmap */ + synchronize_rcu(); + list_for_each_entry(folio, folio_list, lru) { int ret; diff --git a/mm/internal.h b/mm/internal.h index 6902b7dd85091..cc2c5e07fad3b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1182,8 +1182,8 @@ int migrate_device_coherent_page(struct page *page); /* * mm/gup.c */ -struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags); -int __must_check try_grab_page(struct page *page, unsigned int flags); +int __must_check try_grab_folio(struct folio *folio, int refs, + unsigned int flags); /* * mm/huge_memory.c diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 71fe2a95b8bd3..8f2f1bb18c9cb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7823,17 +7823,6 @@ void mem_cgroup_migrate(struct folio *old, struct folio *new) /* Transfer the charge and the css ref */ commit_charge(new, memcg); - /* - * If the old folio is a large folio and is in the split queue, it needs - * to be removed from the split queue now, in case getting an incorrect - * split queue in destroy_large_folio() after the memcg of the old folio - * is cleared. - * - * In addition, the old folio is about to be freed after migration, so - * removing from the split queue a bit earlier seems reasonable. - */ - if (folio_test_large(old) && folio_test_large_rmappable(old)) - folio_undo_large_rmappable(old); old->memcg_data = 0; } diff --git a/mm/migrate.c b/mm/migrate.c index 20cb9f5f74460..a8c6f466e33ac 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -415,6 +415,15 @@ int folio_migrate_mapping(struct address_space *mapping, if (folio_ref_count(folio) != expected_count) return -EAGAIN; + /* Take off deferred split queue while frozen and memcg set */ + if (folio_test_large(folio) && + folio_test_large_rmappable(folio)) { + if (!folio_ref_freeze(folio, expected_count)) + return -EAGAIN; + folio_undo_large_rmappable(folio); + folio_ref_unfreeze(folio, expected_count); + } + /* No turning back from here */ newfolio->index = folio->index; newfolio->mapping = folio->mapping; @@ -433,6 +442,10 @@ int folio_migrate_mapping(struct address_space *mapping, return -EAGAIN; } + /* Take off deferred split queue while frozen and memcg set */ + if (folio_test_large(folio) && folio_test_large_rmappable(folio)) + folio_undo_large_rmappable(folio); + /* * Now we know that no one else is looking at the folio: * no turning back from here. diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 12c9297ed4a7f..8a1c920901292 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -415,13 +415,20 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) else bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; - if (bg_thresh >= thresh) - bg_thresh = thresh / 2; tsk = current; if (rt_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; } + /* + * Dirty throttling logic assumes the limits in page units fit into + * 32-bits. This gives 16TB dirty limits max which is hopefully enough. + */ + if (thresh > UINT_MAX) + thresh = UINT_MAX; + /* This makes sure bg_thresh is within 32-bits as well */ + if (bg_thresh >= thresh) + bg_thresh = thresh / 2; dtc->thresh = thresh; dtc->bg_thresh = bg_thresh; @@ -471,7 +478,11 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat) if (rt_task(tsk)) dirty += dirty / 4; - return dirty; + /* + * Dirty throttling logic assumes the limits in page units fit into + * 32-bits. This gives 16TB dirty limits max which is hopefully enough. + */ + return min_t(unsigned long, dirty, UINT_MAX); } /** @@ -508,10 +519,17 @@ static int dirty_background_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; + unsigned long old_bytes = dirty_background_bytes; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); - if (ret == 0 && write) + if (ret == 0 && write) { + if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) > + UINT_MAX) { + dirty_background_bytes = old_bytes; + return -ERANGE; + } dirty_background_ratio = 0; + } return ret; } @@ -537,6 +555,10 @@ static int dirty_bytes_handler(struct ctl_table *table, int write, ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { + if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) { + vm_dirty_bytes = old_bytes; + return -ERANGE; + } writeback_set_ratelimit(); vm_dirty_ratio = 0; } @@ -1660,7 +1682,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc, dtc->thresh); dtc->wb_bg_thresh = dtc->thresh ? - div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need diff --git a/mm/readahead.c b/mm/readahead.c index c1b23989d9caf..817b2a352d781 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -503,11 +503,11 @@ void page_cache_ra_order(struct readahead_control *ractl, limit = min(limit, index + ra->size - 1); - if (new_order < MAX_PAGECACHE_ORDER) { + if (new_order < MAX_PAGECACHE_ORDER) new_order += 2; - new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); - new_order = min_t(unsigned int, new_order, ilog2(ra->size)); - } + + new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); + new_order = min_t(unsigned int, new_order, ilog2(ra->size)); /* See comment in page_cache_ra_unbounded() */ nofs = memalloc_nofs_save(); diff --git a/mm/shmem.c b/mm/shmem.c index a8b181a634029..c1befe046c7eb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -541,8 +541,9 @@ static bool shmem_confirm_swap(struct address_space *mapping, static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; -bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags) +static bool __shmem_is_huge(struct inode *inode, pgoff_t index, + bool shmem_huge_force, struct mm_struct *mm, + unsigned long vm_flags) { loff_t i_size; @@ -573,6 +574,16 @@ bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, } } +bool shmem_is_huge(struct inode *inode, pgoff_t index, + bool shmem_huge_force, struct mm_struct *mm, + unsigned long vm_flags) +{ + if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) + return false; + + return __shmem_is_huge(inode, index, shmem_huge_force, mm, vm_flags); +} + #if defined(CONFIG_SYSFS) static int shmem_parse_huge(const char *str) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d0cbdd7c1e5bc..e34ea860153f2 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2543,7 +2543,15 @@ static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); static struct xarray * addr_to_vb_xa(unsigned long addr) { - int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus(); + int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids; + + /* + * Please note, nr_cpu_ids points on a highest set + * possible bit, i.e. we never invoke cpumask_next() + * if an index points on it which is nr_cpu_ids - 1. + */ + if (!cpu_possible(index)) + index = cpumask_next(index, cpu_possible_mask); return &per_cpu(vmap_block_queue, index).vmap_blocks; } diff --git a/mm/workingset.c b/mm/workingset.c index c22adb93622a5..a2b28e356e68e 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -412,10 +412,12 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg) * @file: whether the corresponding folio is from the file lru. * @workingset: where the workingset value unpacked from shadow should * be stored. + * @flush: whether to flush cgroup rstat. * * Return: true if the shadow is for a recently evicted folio; false otherwise. */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset) +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush) { struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; @@ -467,10 +469,16 @@ bool workingset_test_recent(void *shadow, bool file, bool *workingset) /* * Flush stats (and potentially sleep) outside the RCU read section. + * + * Note that workingset_test_recent() itself might be called in RCU read + * section (for e.g, in cachestat) - these callers need to skip flushing + * stats (via the flush argument). + * * XXX: With per-memcg flushing and thresholding, is ratelimiting * still needed here? */ - mem_cgroup_flush_stats_ratelimited(eviction_memcg); + if (flush) + mem_cgroup_flush_stats_ratelimited(eviction_memcg); eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); refault = atomic_long_read(&eviction_lruvec->nonresident_age); @@ -558,7 +566,7 @@ void workingset_refault(struct folio *folio, void *shadow) mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); - if (!workingset_test_recent(shadow, file, &workingset)) + if (!workingset_test_recent(shadow, file, &workingset, true)) return; folio_set_active(folio); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 0c76dcde53619..080053a85b4d6 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -899,8 +899,8 @@ static int hci_conn_hash_alloc_unset(struct hci_dev *hdev) U16_MAX, GFP_ATOMIC); } -struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, - u8 role, u16 handle) +static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, + u8 role, u16 handle) { struct hci_conn *conn; @@ -1041,7 +1041,16 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, if (unlikely(handle < 0)) return ERR_PTR(-ECONNREFUSED); - return hci_conn_add(hdev, type, dst, role, handle); + return __hci_conn_add(hdev, type, dst, role, handle); +} + +struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, + u8 role, u16 handle) +{ + if (handle > HCI_CONN_HANDLE_MAX) + return ERR_PTR(-EINVAL); + + return __hci_conn_add(hdev, type, dst, role, handle); } static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index dd3b0f5010187..c644b30977bd8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -63,50 +63,6 @@ DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); -static int hci_scan_req(struct hci_request *req, unsigned long opt) -{ - __u8 scan = opt; - - BT_DBG("%s %x", req->hdev->name, scan); - - /* Inquiry and Page scans */ - hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); - return 0; -} - -static int hci_auth_req(struct hci_request *req, unsigned long opt) -{ - __u8 auth = opt; - - BT_DBG("%s %x", req->hdev->name, auth); - - /* Authentication */ - hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); - return 0; -} - -static int hci_encrypt_req(struct hci_request *req, unsigned long opt) -{ - __u8 encrypt = opt; - - BT_DBG("%s %x", req->hdev->name, encrypt); - - /* Encryption */ - hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); - return 0; -} - -static int hci_linkpol_req(struct hci_request *req, unsigned long opt) -{ - __le16 policy = cpu_to_le16(opt); - - BT_DBG("%s %x", req->hdev->name, policy); - - /* Default link policy */ - hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); - return 0; -} - /* Get HCI device by index. * Device is held on return. */ struct hci_dev *hci_dev_get(int index) @@ -735,6 +691,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) { struct hci_dev *hdev; struct hci_dev_req dr; + __le16 policy; int err = 0; if (copy_from_user(&dr, arg, sizeof(dr))) @@ -761,8 +718,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: - err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE, + 1, &dr.dev_opt, HCI_CMD_TIMEOUT); break; case HCISETENCRYPT: @@ -773,19 +730,23 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ - err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, + HCI_OP_WRITE_AUTH_ENABLE, + 1, &dr.dev_opt, + HCI_CMD_TIMEOUT); if (err) break; } - err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE, + 1, &dr.dev_opt, + HCI_CMD_TIMEOUT); break; case HCISETSCAN: - err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE, + 1, &dr.dev_opt, + HCI_CMD_TIMEOUT); /* Ensure that the connectable and discoverable states * get correctly modified as this was a non-mgmt change. @@ -795,8 +756,11 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) break; case HCISETLINKPOL: - err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt, - HCI_INIT_TIMEOUT, NULL); + policy = cpu_to_le16(dr.dev_opt); + + err = __hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, + 2, &policy, + HCI_CMD_TIMEOUT); break; case HCISETLINKMODE: @@ -2751,7 +2715,11 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); + cancel_work_sync(&hdev->rx_work); + cancel_work_sync(&hdev->cmd_work); + cancel_work_sync(&hdev->tx_work); cancel_work_sync(&hdev->power_on); + cancel_work_sync(&hdev->error_reset); hci_cmd_sync_clear(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a487f9df81450..93f7ac905cece 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6311,6 +6311,13 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK; legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type); + + if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, + &hdev->quirks)) { + info->primary_phy &= 0x1f; + info->secondary_phy &= 0x1f; + } + if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, @@ -6660,6 +6667,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, struct bt_iso_qos *qos; bool pending = false; u16 handle = __le16_to_cpu(ev->handle); + u32 c_sdu_interval, p_sdu_interval; bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -6684,12 +6692,25 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, pending = test_and_clear_bit(HCI_CONN_CREATE_CIS, &conn->flags); - /* Convert ISO Interval (1.25 ms slots) to SDU Interval (us) */ - qos->ucast.in.interval = le16_to_cpu(ev->interval) * 1250; - qos->ucast.out.interval = qos->ucast.in.interval; + /* BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 6, Part G + * page 3075: + * Transport_Latency_C_To_P = CIG_Sync_Delay + (FT_C_To_P) × + * ISO_Interval + SDU_Interval_C_To_P + * ... + * SDU_Interval = (CIG_Sync_Delay + (FT) x ISO_Interval) - + * Transport_Latency + */ + c_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) + + (ev->c_ft * le16_to_cpu(ev->interval) * 1250)) - + get_unaligned_le24(ev->c_latency); + p_sdu_interval = (get_unaligned_le24(ev->cig_sync_delay) + + (ev->p_ft * le16_to_cpu(ev->interval) * 1250)) - + get_unaligned_le24(ev->p_latency); switch (conn->role) { case HCI_ROLE_SLAVE: + qos->ucast.in.interval = c_sdu_interval; + qos->ucast.out.interval = p_sdu_interval; /* Convert Transport Latency (us) to Latency (msec) */ qos->ucast.in.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency), @@ -6703,6 +6724,8 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, qos->ucast.out.phy = ev->p_phy; break; case HCI_ROLE_MASTER: + qos->ucast.in.interval = p_sdu_interval; + qos->ucast.out.interval = c_sdu_interval; /* Convert Transport Latency (us) to Latency (msec) */ qos->ucast.out.latency = DIV_ROUND_CLOSEST(get_unaligned_le24(ev->c_latency), @@ -6893,6 +6916,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis = hci_conn_hash_lookup_handle(hdev, handle); if (!bis) { + if (handle > HCI_CONN_HANDLE_MAX) { + bt_dev_dbg(hdev, "ignore too large handle %u", handle); + continue; + } bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, HCI_ROLE_SLAVE, handle); if (IS_ERR(bis)) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a8a7d2b368701..eea34e6a236fd 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -280,6 +280,19 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, } EXPORT_SYMBOL(__hci_cmd_sync_status); +int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout) +{ + int err; + + hci_req_sync_lock(hdev); + err = __hci_cmd_sync_status(hdev, opcode, plen, param, timeout); + hci_req_sync_unlock(hdev); + + return err; +} +EXPORT_SYMBOL(hci_cmd_sync_status); + static void hci_cmd_sync_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_sync_work); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index cc055b952ce69..398fb81f7a134 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1356,8 +1356,7 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); switch (sk->sk_state) { case BT_CONNECT2: - if (pi->conn->hcon && - test_bit(HCI_CONN_PA_SYNC, &pi->conn->hcon->flags)) { + if (test_bit(BT_SK_PA_SYNC, &pi->flags)) { iso_conn_big_sync(sk); sk->sk_state = BT_LISTEN; } else { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index aed025734d047..c3c26bbb5ddae 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6761,6 +6761,8 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, BT_DBG("chan %p, len %d", chan, skb->len); + l2cap_chan_lock(chan); + if (chan->state != BT_BOUND && chan->state != BT_CONNECTED) goto drop; @@ -6777,6 +6779,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, } drop: + l2cap_chan_unlock(chan); l2cap_chan_put(chan); free_skb: kfree_skb(skb); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 6db60946c627c..ba437c6f6ee59 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1239,6 +1239,10 @@ static void l2cap_sock_kill(struct sock *sk) BT_DBG("sk %p state %s", sk, state_to_string(sk->sk_state)); + /* Sock is dead, so set chan data to NULL, avoid other task use invalid + * sock pointer. + */ + l2cap_pi(sk)->chan->data = NULL; /* Kill poor orphan */ l2cap_chan_put(l2cap_pi(sk)->chan); @@ -1481,12 +1485,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { - struct sock *sk = chan->data; - struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sock *sk; + struct l2cap_pinfo *pi; int err; - lock_sock(sk); + sk = chan->data; + if (!sk) + return -ENXIO; + pi = l2cap_pi(sk); + lock_sock(sk); if (chan->mode == L2CAP_MODE_ERTM && !list_empty(&pi->rx_busy)) { err = -ENOMEM; goto done; diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 1daf95e17d67e..3a5bd1cd1e99f 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -429,7 +429,10 @@ static int is_out(const struct crush_map *map, /** * crush_choose_firstn - choose numrep distinct items of given type * @map: the crush_map + * @work: working space initialized by crush_init_workspace() * @bucket: the bucket we are choose an item from + * @weight: weight vector (for map leaves) + * @weight_max: size of weight vector * @x: crush input value * @numrep: the number of items to choose * @type: the type of item to choose @@ -445,6 +448,7 @@ static int is_out(const struct crush_map *map, * @vary_r: pass r to recursive calls * @out2: second output vector for leaf items (if @recurse_to_leaf) * @parent_r: r value passed from the parent + * @choose_args: weights and ids for each known bucket */ static int crush_choose_firstn(const struct crush_map *map, struct crush_work *work, @@ -636,9 +640,8 @@ static int crush_choose_firstn(const struct crush_map *map, } -/** +/* * crush_choose_indep: alternative breadth-first positionally stable mapping - * */ static void crush_choose_indep(const struct crush_map *map, struct crush_work *work, diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index f263f7e91a219..ab66b599ac479 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1085,13 +1085,19 @@ static void delayed_work(struct work_struct *work) struct ceph_mon_client *monc = container_of(work, struct ceph_mon_client, delayed_work.work); - dout("monc delayed_work\n"); mutex_lock(&monc->mutex); + dout("%s mon%d\n", __func__, monc->cur_mon); + if (monc->cur_mon < 0) { + goto out; + } + if (monc->hunting) { dout("%s continuing hunt\n", __func__); reopen_session(monc); } else { int is_auth = ceph_auth_is_authenticated(monc->auth); + + dout("%s is_authed %d\n", __func__, is_auth); if (ceph_con_keepalive_expired(&monc->con, CEPH_MONC_PING_TIMEOUT)) { dout("monc keepalive timeout\n"); @@ -1116,6 +1122,8 @@ static void delayed_work(struct work_struct *work) } } __schedule_delayed(monc); + +out: mutex_unlock(&monc->mutex); } @@ -1232,13 +1240,15 @@ EXPORT_SYMBOL(ceph_monc_init); void ceph_monc_stop(struct ceph_mon_client *monc) { dout("stop\n"); - cancel_delayed_work_sync(&monc->delayed_work); mutex_lock(&monc->mutex); __close_session(monc); + monc->hunting = false; monc->cur_mon = -1; mutex_unlock(&monc->mutex); + cancel_delayed_work_sync(&monc->delayed_work); + /* * flush msgr queue before we destroy ourselves to ensure that: * - any work that references our embedded con is finished. diff --git a/net/core/datagram.c b/net/core/datagram.c index e614cfd8e14a5..e72dd78471a66 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -416,15 +416,23 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { - struct page *page = skb_frag_page(frag); - u8 *vaddr = kmap(page); + u32 p_off, p_len, copied; + struct page *p; + u8 *vaddr; if (copy > len) copy = len; - n = INDIRECT_CALL_1(cb, simple_copy_to_iter, - vaddr + skb_frag_off(frag) + offset - start, - copy, data, to); - kunmap(page); + + n = 0; + skb_frag_foreach_page(frag, + skb_frag_off(frag) + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_local_page(p); + n += INDIRECT_CALL_1(cb, simple_copy_to_iter, + vaddr + p_off, p_len, data, to); + kunmap_local(vaddr); + } + offset += n; if (n != copy) goto short_copy; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index fd20aae30be23..bbf40b9997138 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -434,7 +434,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, page = sg_page(sge); if (copied + copy > len) copy = len - copied; - copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (copy) + copy = copy_page_to_iter(page, sge->offset, copy, iter); if (!copy) { copied = copied ? copied : -EFAULT; goto out; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e645d751a5e89..223dcd25d88a2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1306,7 +1306,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR && rxfh.input_xfrm != RXH_XFRM_NO_CHANGE) return -EINVAL; - if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + if (rxfh.input_xfrm != RXH_XFRM_NO_CHANGE && + (rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index b2de2108b356a..34d76e87847d0 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -37,6 +37,8 @@ static int linkstate_get_sqi(struct net_device *dev) mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi) ret = -EOPNOTSUPP; + else if (!phydev->link) + ret = -ENETDOWN; else ret = phydev->drv->get_sqi(phydev); mutex_unlock(&phydev->lock); @@ -55,6 +57,8 @@ static int linkstate_get_sqi_max(struct net_device *dev) mutex_lock(&phydev->lock); if (!phydev->drv || !phydev->drv->get_sqi_max) ret = -EOPNOTSUPP; + else if (!phydev->link) + ret = -ENETDOWN; else ret = phydev->drv->get_sqi_max(phydev); mutex_unlock(&phydev->lock); @@ -62,6 +66,17 @@ static int linkstate_get_sqi_max(struct net_device *dev) return ret; }; +static bool linkstate_sqi_critical_error(int sqi) +{ + return sqi < 0 && sqi != -EOPNOTSUPP && sqi != -ENETDOWN; +} + +static bool linkstate_sqi_valid(struct linkstate_reply_data *data) +{ + return data->sqi >= 0 && data->sqi_max >= 0 && + data->sqi <= data->sqi_max; +} + static int linkstate_get_link_ext_state(struct net_device *dev, struct linkstate_reply_data *data) { @@ -93,12 +108,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, data->link = __ethtool_get_link(dev); ret = linkstate_get_sqi(dev); - if (ret < 0 && ret != -EOPNOTSUPP) + if (linkstate_sqi_critical_error(ret)) goto out; data->sqi = ret; ret = linkstate_get_sqi_max(dev); - if (ret < 0 && ret != -EOPNOTSUPP) + if (linkstate_sqi_critical_error(ret)) goto out; data->sqi_max = ret; @@ -136,11 +151,10 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base, len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; - if (data->sqi != -EOPNOTSUPP) - len += nla_total_size(sizeof(u32)); - - if (data->sqi_max != -EOPNOTSUPP) - len += nla_total_size(sizeof(u32)); + if (linkstate_sqi_valid(data)) { + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI */ + len += nla_total_size(sizeof(u32)); /* LINKSTATE_SQI_MAX */ + } if (data->link_ext_state_provided) len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */ @@ -164,13 +178,14 @@ static int linkstate_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; - if (data->sqi != -EOPNOTSUPP && - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) - return -EMSGSIZE; + if (linkstate_sqi_valid(data)) { + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) + return -EMSGSIZE; - if (data->sqi_max != -EOPNOTSUPP && - nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) - return -EMSGSIZE; + if (nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, + data->sqi_max)) + return -EMSGSIZE; + } if (data->link_ext_state_provided) { if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7adace541fe29..9712cdb8087c2 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1383,6 +1383,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; + req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; @@ -1398,6 +1399,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.sdiag_family = rc->idiag_family; req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); req.idiag_ext = rc->idiag_ext; + req.pad = 0; req.idiag_states = rc->idiag_states; req.id = rc->id; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e39cb881e209..38da23f991d60 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2129,8 +2129,16 @@ void tcp_clear_retrans(struct tcp_sock *tp) static inline void tcp_init_undo(struct tcp_sock *tp) { tp->undo_marker = tp->snd_una; + /* Retransmission still in flight may cause DSACKs later. */ - tp->undo_retrans = tp->retrans_out ? : -1; + /* First, account for regular retransmits in flight: */ + tp->undo_retrans = tp->retrans_out; + /* Next, account for TLP retransmits in flight: */ + if (tp->tlp_high_seq && tp->tlp_retrans) + tp->undo_retrans++; + /* Finally, avoid 0, because undo_retrans==0 means "can undo now": */ + if (!tp->undo_retrans) + tp->undo_retrans = -1; } static bool tcp_is_rack(const struct sock *sk) @@ -2209,6 +2217,7 @@ void tcp_enter_loss(struct sock *sk) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; + tp->tlp_high_seq = 0; tcp_ecn_queue_cwr(tp); /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous @@ -3077,7 +3086,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, return; if (tcp_try_undo_dsack(sk)) - tcp_try_keep_open(sk); + tcp_try_to_open(sk, flag); tcp_identify_packet_loss(sk, ack_flag); if (icsk->icsk_ca_state != TCP_CA_Recovery) { @@ -4223,6 +4232,13 @@ void tcp_parse_options(const struct net *net, * checked (see tcp_v{4,6}_rcv()). */ break; +#endif +#ifdef CONFIG_TCP_AO + case TCPOPT_AO: + /* TCP AO has already been checked + * (see tcp_inbound_ao_hash()). + */ + break; #endif case TCPOPT_FASTOPEN: tcp_parse_fastopen_option( diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index e93df98de3f45..b01eb6d944139 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -619,6 +619,7 @@ static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr), }, + [TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, }, /* Following attributes are not received for GET/DEL, * we keep them for reference */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5bfd76a31af6d..892c86657fbc2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -483,15 +483,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk, const struct sk_buff *skb, u32 rtx_delta) { + const struct inet_connection_sock *icsk = inet_csk(sk); + u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); const struct tcp_sock *tp = tcp_sk(sk); - const int timeout = TCP_RTO_MAX * 2; + int timeout = TCP_RTO_MAX * 2; s32 rcv_delta; + if (user_timeout) { + /* If user application specified a TCP_USER_TIMEOUT, + * it does not want win 0 packets to 'reset the timer' + * while retransmits are not making progress. + */ + if (rtx_delta > user_timeout) + return true; + timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout)); + } /* Note: timer interrupt might have been delayed by at least one jiffy, * and tp->rcv_tstamp might very well have been written recently. * rcv_delta can thus be negative. */ - rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; + rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; @@ -536,8 +547,6 @@ void tcp_retransmit_timer(struct sock *sk) if (WARN_ON_ONCE(!skb)) return; - tp->tlp_high_seq = 0; - if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 189c9113fe9a1..578668878a85b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -326,6 +326,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, goto fail_unlock; } + sock_set_flag(sk, SOCK_RCU_FREE); + sk_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -342,7 +344,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, hslot2->count++; spin_unlock(&hslot2->lock); } - sock_set_flag(sk, SOCK_RCU_FREE); + error = 0; fail_unlock: spin_unlock_bh(&hslot->lock); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1132dea0e290e..0965ad11ec747 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -423,6 +423,7 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_ERP_SLOT; } +/* context: requires softirqs disabled */ void ieee80211_handle_queued_frames(struct ieee80211_local *local) { struct sk_buff *skb; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 963ed75deb765..771c05640aa3a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1567,7 +1567,9 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, void ieee80211_stop_device(struct ieee80211_local *local) { + local_bh_disable(); ieee80211_handle_queued_frames(local); + local_bh_enable(); ieee80211_led_radio(local, false); ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 9ab7396668d22..21b7c3b280b45 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -161,8 +161,10 @@ void ieee802154_configure_durations(struct wpan_phy *phy, } phy->symbol_duration = duration; - phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; - phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC; + phy->lifs_period = + (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC; + phy->sifs_period = + (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_USEC; } EXPORT_SYMBOL(ieee802154_configure_durations); @@ -184,10 +186,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy) * Should be done when all drivers sets this value. */ - wpan_phy->lifs_period = - (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000; - wpan_phy->sifs_period = - (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000; + wpan_phy->lifs_period = (IEEE802154_LIFS_PERIOD * + wpan_phy->symbol_duration) / NSEC_PER_USEC; + wpan_phy->sifs_period = (IEEE802154_SIFS_PERIOD * + wpan_phy->symbol_duration) / NSEC_PER_USEC; } int ieee802154_register_hw(struct ieee802154_hw *hw) diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 2a6f1ed763c9b..6fbed5bb5c3e0 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -34,8 +34,8 @@ void ieee802154_xmit_sync_worker(struct work_struct *work) if (res) goto err_tx; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + DEV_STATS_INC(dev, tx_packets); + DEV_STATS_ADD(dev, tx_bytes, skb->len); ieee802154_xmit_complete(&local->hw, skb, false); @@ -90,8 +90,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) if (ret) goto err_wake_netif_queue; - dev->stats.tx_packets++; - dev->stats.tx_bytes += len; + DEV_STATS_INC(dev, tx_packets); + DEV_STATS_ADD(dev, tx_bytes, len); } else { local->tx_skb = skb; queue_work(local->workqueue, &local->sync_tx_work); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e8dcf41d360d9..91cc3a81ba8f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3823,6 +3823,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); } +/** nft_chain_validate - loop detection and hook validation + * + * @ctx: context containing call depth and base chain + * @chain: chain to validate + * + * Walk through the rules of the given chain and chase all jumps/gotos + * and set lookups until either the jump limit is hit or all reachable + * chains have been validated. + */ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) { struct nft_expr *expr, *last; @@ -3844,6 +3853,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (!expr->ops->validate) continue; + /* This may call nft_chain_validate() recursively, + * callers that do so must increment ctx->level. + */ err = expr->ops->validate(ctx, expr, &data); if (err < 0) return err; @@ -10809,150 +10821,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); -/* - * Loop detection - walk through the ruleset beginning at the destination chain - * of a new jump until either the source chain is reached (loop) or all - * reachable chains have been traversed. - * - * The loop check is performed whenever a new jump verdict is added to an - * expression or verdict map or a verdict map is bound to a new chain. - */ - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain); - -static int nft_check_loops(const struct nft_ctx *ctx, - const struct nft_set_ext *ext) -{ - const struct nft_data *data; - int ret; - - data = nft_set_ext_data(ext); - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - ret = nf_tables_check_loops(ctx, data->verdict.chain); - break; - default: - ret = 0; - break; - } - - return ret; -} - -static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_elem_priv *elem_priv) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); - - if (!nft_set_elem_active(ext, iter->genmask)) - return 0; - - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) - return 0; - - return nft_check_loops(ctx, ext); -} - -static int nft_set_catchall_loops(const struct nft_ctx *ctx, - struct nft_set *set) -{ - u8 genmask = nft_genmask_next(ctx->net); - struct nft_set_elem_catchall *catchall; - struct nft_set_ext *ext; - int ret = 0; - - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { - ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask)) - continue; - - ret = nft_check_loops(ctx, ext); - if (ret < 0) - return ret; - } - - return ret; -} - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain) -{ - const struct nft_rule *rule; - const struct nft_expr *expr, *last; - struct nft_set *set; - struct nft_set_binding *binding; - struct nft_set_iter iter; - - if (ctx->chain == chain) - return -ELOOP; - - if (fatal_signal_pending(current)) - return -EINTR; - - list_for_each_entry(rule, &chain->rules, list) { - nft_rule_for_each_expr(expr, last, rule) { - struct nft_immediate_expr *priv; - const struct nft_data *data; - int err; - - if (strcmp(expr->ops->type->name, "immediate")) - continue; - - priv = nft_expr_priv(expr); - if (priv->dreg != NFT_REG_VERDICT) - continue; - - data = &priv->data; - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - err = nf_tables_check_loops(ctx, - data->verdict.chain); - if (err < 0) - return err; - break; - default: - break; - } - } - } - - list_for_each_entry(set, &ctx->table->sets, list) { - if (!nft_is_active_next(ctx->net, set)) - continue; - if (!(set->flags & NFT_SET_MAP) || - set->dtype != NFT_DATA_VERDICT) - continue; - - list_for_each_entry(binding, &set->bindings, list) { - if (!(binding->flags & NFT_SET_MAP) || - binding->chain != chain) - continue; - - iter.genmask = nft_genmask_next(ctx->net); - iter.type = NFT_ITER_UPDATE; - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nf_tables_loop_check_setelem; - - set->ops->walk(ctx, set, &iter); - if (!iter.err) - iter.err = nft_set_catchall_loops(ctx, set); - - if (iter.err < 0) - return iter.err; - } - } - - return 0; -} - /** * nft_parse_u32_check - fetch u32 attribute and check for maximum value * @@ -11065,7 +10933,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, if (data != NULL && (data->verdict.code == NFT_GOTO || data->verdict.code == NFT_JUMP)) { - err = nf_tables_check_loops(ctx, data->verdict.chain); + err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; } @@ -11483,8 +11351,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nf_tables_destroy_list)) - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f1c31757e4969..55e28e1da66ec 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -325,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) hooks = nf_hook_entries_head(net, pf, entry->state.hook); i = entry->hook_index; - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { + if (!hooks || i >= hooks->num_hook_entries) { kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); nf_queue_entry_free(entry); return; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2a96d9c1db65b..6fa3cca87d346 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1077,6 +1077,14 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ if (nf_conntrack_confirm(skb) != NF_ACCEPT) goto drop; + + /* The ct may be dropped if a clash has been resolved, + * so it's necessary to retrieve it from skb again to + * prevent UAF. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + skip_add = true; } if (!skip_add) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index c2ef9dcf91d2d..cc6051d4f2ef8 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -91,7 +91,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -121,7 +121,7 @@ static void ingress_destroy(struct Qdisc *sch) tcf_block_put_ext(q->block, sch, &q->block_info); if (entry) { - tcx_miniq_set_active(entry, false); + tcx_miniq_dec(entry); if (!tcx_entry_is_active(entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(entry); @@ -257,7 +257,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, true, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_ingress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, true); @@ -276,7 +276,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, entry = tcx_entry_fetch_or_create(dev, false, &created); if (!entry) return -ENOMEM; - tcx_miniq_set_active(entry, true); + tcx_miniq_inc(entry); mini_qdisc_pair_init(&q->miniqp_egress, sch, &tcx_entry(entry)->miniq); if (created) tcx_entry_update(dev, entry, false); @@ -302,7 +302,7 @@ static void clsact_destroy(struct Qdisc *sch) tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); if (ingress_entry) { - tcx_miniq_set_active(ingress_entry, false); + tcx_miniq_dec(ingress_entry); if (!tcx_entry_is_active(ingress_entry)) { tcx_entry_update(dev, NULL, true); tcx_entry_free(ingress_entry); @@ -310,7 +310,7 @@ static void clsact_destroy(struct Qdisc *sch) } if (egress_entry) { - tcx_miniq_set_active(egress_entry, false); + tcx_miniq_dec(egress_entry); if (!tcx_entry_is_active(egress_entry)) { tcx_entry_update(dev, NULL, false); tcx_entry_free(egress_entry); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfc353eea8eda..0e1691316f423 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2441,6 +2441,13 @@ static void xs_tcp_setup_socket(struct work_struct *work) transport->srcport = 0; status = -EAGAIN; break; + case -EPERM: + /* Happens, for instance, if a BPF program is preventing + * the connect. Remap the error so upper layers can better + * deal with it. + */ + status = -ECONNREFUSED; + fallthrough; case -EINVAL: /* Happens, for instance, if the user specified a link * local IPv6 address without a scope-id. diff --git a/net/unix/garbage.c b/net/unix/garbage.c index dfe94a90ece40..23efb78fe9ef4 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -476,6 +476,7 @@ static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_inde } if (vertex->index == vertex->scc_index) { + struct unix_vertex *v; struct list_head scc; bool scc_dead = true; @@ -486,15 +487,15 @@ static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_inde */ __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry); - list_for_each_entry_reverse(vertex, &scc, scc_entry) { + list_for_each_entry_reverse(v, &scc, scc_entry) { /* Don't restart DFS from this vertex in unix_walk_scc(). */ - list_move_tail(&vertex->entry, &unix_visited_vertices); + list_move_tail(&v->entry, &unix_visited_vertices); /* Mark vertex as off-stack. */ - vertex->index = unix_vertex_grouped_index; + v->index = unix_vertex_grouped_index; if (scc_dead) - scc_dead = unix_vertex_dead(vertex); + scc_dead = unix_vertex_dead(v); } if (scc_dead) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3c0bca4238d35..72c7bf5585816 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -468,6 +468,10 @@ static const struct netlink_range_validation nl80211_punct_bitmap_range = { .max = 0xffff, }; +static const struct netlink_range_validation q_range = { + .max = INT_MAX, +}; + static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -754,7 +758,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 }, - [NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 }, + [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range), [NL80211_ATTR_HE_CAPABILITY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa, NL80211_HE_MAX_CAPABILITY_LEN), diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh index a78b804b680cf..b9513d224476f 100755 --- a/scripts/ld-version.sh +++ b/scripts/ld-version.sh @@ -57,9 +57,11 @@ else fi fi -# Some distributions append a package release number, as in 2.34-4.fc32 -# Trim the hyphen and any characters that follow. -version=${version%-*} +# There may be something after the version, such as a distribution's package +# release number (like Fedora's "2.34-4.fc32") or punctuation (like LLD briefly +# added before the "compatible with GNU linkers" string), so remove everything +# after just numbers and periods. +version=${version%%[!0-9.]*} cversion=$(get_canonical_version $version) min_cversion=$(get_canonical_version $min_version) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index fffc8af8deb17..c52d517b93647 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -83,7 +83,6 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA done if [ -d "%{buildroot}/lib/modules/%{KERNELRELEASE}/dtb" ];then - echo "/lib/modules/%{KERNELRELEASE}/dtb" find "%{buildroot}/lib/modules/%{KERNELRELEASE}/dtb" -printf "%%%ghost /boot/dtb-%{KERNELRELEASE}/%%P\n" fi diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index abdd22007ed83..e4a79a9b2d588 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -427,8 +427,6 @@ static void __init remove_securityfs_measurement_lists(struct dentry **lists) kfree(lists); } - - securityfs_measurement_list_count = 0; } static int __init create_securityfs_measurement_lists(void) @@ -625,6 +623,7 @@ int __init ima_fs_init(void) securityfs_remove(binary_runtime_measurements); remove_securityfs_measurement_lists(ascii_securityfs_measurement_lists); remove_securityfs_measurement_lists(binary_securityfs_measurement_lists); + securityfs_measurement_list_count = 0; securityfs_remove(ima_symlink); securityfs_remove(ima_dir); diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 51998d1c72ff1..80c816922f788 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -128,8 +128,8 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B7", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, - { "17AA38C7", 4, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT }, 0, 2, -1, 1000, 4500, 24 }, - { "17AA38C8", 4, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT }, 0, 2, -1, 1000, 4500, 24 }, + { "17AA38C7", 4, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT }, 0, 2, -1, 1000, 4500, 24 }, + { "17AA38C8", 4, INTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, CS35L41_RIGHT, CS35L41_LEFT }, 0, 2, -1, 1000, 4500, 24 }, { "17AA38F9", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, { "17AA38FA", 2, EXTERNAL, { CS35L41_RIGHT, CS35L41_LEFT, 0, 0 }, 0, 2, -1, 0, 0, 0 }, {} diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 811e82474200f..766f0b1d3e9d6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10053,6 +10053,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x84a6, "HP 250 G7 Notebook PC", ALC269_FIXUP_HP_LINE1_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x84ae, "HP 15-db0403ng", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), @@ -10383,6 +10384,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), + SND_PCI_QUIRK(0x10ec, 0x11bc, "VAIO VJFE-IL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x10ec, 0x1230, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), @@ -10480,6 +10482,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa763, "Clevo V54x_6x_TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -10655,6 +10658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 8ca8bcd177ab4..dfda6bb5c6f8b 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -38,7 +38,9 @@ static bool rt711_readable_register(struct device *dev, unsigned int reg) case 0x8300 ... 0x83ff: case 0x9c00 ... 0x9cff: case 0xb900 ... 0xb9ff: + case 0x752008: case 0x752009: + case 0x75200b: case 0x752011: case 0x75201a: case 0x752045: diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index c61d298ea6b3a..1c823f9eea570 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -617,12 +617,6 @@ static int hda_dai_suspend(struct hdac_bus *bus) sdai = swidget->private; ops = sdai->platform_private; - ret = hda_link_dma_cleanup(hext_stream->link_substream, - hext_stream, - cpu_dai); - if (ret < 0) - return ret; - /* for consistency with TRIGGER_SUSPEND */ if (ops->post_trigger) { ret = ops->post_trigger(sdev, cpu_dai, @@ -631,6 +625,12 @@ static int hda_dai_suspend(struct hdac_bus *bus) if (ret < 0) return ret; } + + ret = hda_link_dma_cleanup(hext_stream->link_substream, + hext_stream, + cpu_dai); + if (ret < 0) + return ret; } } diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c index 9fb8521b896ba..f6e24edd7adbe 100644 --- a/sound/soc/sof/intel/hda-pcm.c +++ b/sound/soc/sof/intel/hda-pcm.c @@ -258,6 +258,12 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev, snd_pcm_hw_constraint_integer(substream->runtime, SNDRV_PCM_HW_PARAM_PERIODS); + /* Limit the maximum number of periods to not exceed the BDL entries count */ + if (runtime->hw.periods_max > HDA_DSP_MAX_BDL_ENTRIES) + snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIODS, + runtime->hw.periods_min, + HDA_DSP_MAX_BDL_ENTRIES); + /* Only S16 and S32 supported by HDA hardware when used without DSP */ if (sdev->dspless_mode_selected) snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT, diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 233f2b6edf520..49b79cf0c5cc5 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -86,14 +86,6 @@ static struct comm_str *comm_str__new(const char *str) return result; } -static int comm_str__cmp(const void *_lhs, const void *_rhs) -{ - const struct comm_str *lhs = *(const struct comm_str * const *)_lhs; - const struct comm_str *rhs = *(const struct comm_str * const *)_rhs; - - return strcmp(comm_str__str(lhs), comm_str__str(rhs)); -} - static int comm_str__search(const void *_key, const void *_member) { const char *key = _key; @@ -169,9 +161,24 @@ static struct comm_str *comm_strs__findnew(const char *str) } result = comm_str__new(str); if (result) { - comm_strs->strs[comm_strs->num_strs++] = result; - qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *), - comm_str__cmp); + int low = 0, high = comm_strs->num_strs - 1; + int insert = comm_strs->num_strs; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert], + (comm_strs->num_strs - insert) * sizeof(struct comm_str *)); + comm_strs->num_strs++; + comm_strs->strs[insert] = result; } } up_write(&comm_strs->lock); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index ab3d0c01dd639..a69a9c6612008 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -203,11 +203,27 @@ int __dsos__add(struct dsos *dsos, struct dso *dso) dsos->dsos = temp; dsos->allocated = to_allocate; } - dsos->dsos[dsos->cnt++] = dso__get(dso); - if (dsos->cnt >= 2 && dsos->sorted) { - dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2], - &dsos->dsos[dsos->cnt - 1]) - <= 0; + if (!dsos->sorted) { + dsos->dsos[dsos->cnt++] = dso__get(dso); + } else { + int low = 0, high = dsos->cnt - 1; + int insert = dsos->cnt; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert], + (dsos->cnt - insert) * sizeof(struct dso *)); + dsos->cnt++; + dsos->dsos[insert] = dso__get(dso); } dso__set_dsos(dso, dsos); return 0; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 3482248aa3442..90d5afd52dd06 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -630,11 +630,15 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); + struct device *dev = &port->dev; if (!cxlhdm) return ERR_PTR(-ENOMEM); cxlhdm->port = port; + cxlhdm->interleave_mask = ~0U; + cxlhdm->iw_cap_mask = ~0UL; + dev_set_drvdata(dev, cxlhdm); return cxlhdm; } diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index eeabd798bc3ae..98b6b6a886ce3 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -58,9 +58,12 @@ CONFIG_MPLS=y CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y +CONFIG_NET_ACT_SKBMOD=y +CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_BPF=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=y diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index bc98411446855..1af9ec1149aab 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -9,6 +9,8 @@ #define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" #include "test_tc_link.skel.h" + +#include "netlink_helpers.h" #include "tc_helpers.h" void serial_test_tc_links_basic(void) @@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void) test_tc_links_ingress(BPF_TCX_INGRESS, false, false); } +struct qdisc_req { + struct nlmsghdr n; + struct tcmsg t; + char buf[1024]; +}; + +static int qdisc_replace(int ifindex, const char *kind, bool block) +{ + struct rtnl_handle rth = { .fd = -1 }; + struct qdisc_req req; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWQDISC; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_ifindex = ifindex; + req.t.tcm_parent = 0xfffffff1; + + addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1); + if (block) + addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + +void serial_test_tc_links_dev_chain0(void) +{ + int err, ifindex; + + ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth"); + ifindex = if_nametoindex("foo"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + err = qdisc_replace(ifindex, "ingress", true); + if (!ASSERT_OK(err, "attaching ingress")) + goto cleanup; + ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block"); + err = qdisc_replace(ifindex, "clsact", false); + if (!ASSERT_OK(err, "attaching clsact")) + goto cleanup; + /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s + * triggered the issue without the fix reliably 100% of the time. + */ + sleep(5); + ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter"); +cleanup: + ASSERT_OK(system("ip link del dev foo"), "del veth"); + ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed"); + ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed"); +} + static void test_tc_links_dev_mixed(int target) { LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c new file mode 100644 index 0000000000000..871d16cb95cfd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include "timer_lockup.skel.h" + +static long cpu; +static int *timer1_err; +static int *timer2_err; +static bool skip; + +volatile int k = 0; + +static void *timer_lockup_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1000, + ); + int i, prog_fd = *(int *)arg; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), + &cpuset), + "cpu affinity"); + + for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) { + bpf_prog_test_run_opts(prog_fd, &opts); + /* Skip the test if we can't reproduce the race in a reasonable + * amount of time. + */ + if (i > 50) { + WRITE_ONCE(skip, true); + break; + } + } + + return NULL; +} + +void test_timer_lockup(void) +{ + int timer1_prog, timer2_prog; + struct timer_lockup *skel; + pthread_t thrds[2]; + void *ret; + + skel = timer_lockup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) + return; + + timer1_prog = bpf_program__fd(skel->progs.timer1_prog); + timer2_prog = bpf_program__fd(skel->progs.timer2_prog); + + timer1_err = &skel->bss->timer1_err; + timer2_err = &skel->bss->timer2_err; + + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, + &timer1_prog), + "pthread_create thread1")) + goto out; + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, + &timer2_prog), + "pthread_create thread2")) { + pthread_exit(&thrds[0]); + goto out; + } + + pthread_join(thrds[1], &ret); + pthread_join(thrds[0], &ret); + + if (skip) { + test__skip(); + goto out; + } + + if (*timer1_err != -EDEADLK && *timer1_err != 0) + ASSERT_FAIL("timer1_err bad value"); + if (*timer2_err != -EDEADLK && *timer2_err != 0) + ASSERT_FAIL("timer2_err bad value"); +out: + timer_lockup__destroy(skel); + return; +} diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c new file mode 100644 index 0000000000000..3e520133281ea --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_lockup.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer2_map SEC(".maps"); + +int timer1_err; +int timer2_err; + +static int timer_cb1(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) + timer2_err = bpf_timer_cancel(timer); + + return 0; +} + +static int timer_cb2(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) + timer1_err = bpf_timer_cancel(timer); + + return 0; +} + +SEC("tc") +int timer1_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) { + bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb1); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} + +SEC("tc") +int timer2_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) { + bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb2); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index b634969cbb6f1..40723a6a083f4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -66,8 +66,6 @@ #include #include #include -#include -#include #include "kselftest.h" @@ -82,17 +80,6 @@ # define TH_LOG_ENABLED 1 #endif -/* Wait for the child process to end but without sharing memory mapping. */ -static inline pid_t clone3_vfork(void) -{ - struct clone_args args = { - .flags = CLONE_VFORK, - .exit_signal = SIGCHLD, - }; - - return syscall(__NR_clone3, &args, sizeof(args)); -} - /** * TH_LOG() * @@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void) } \ if (setjmp(_metadata->env) == 0) { \ /* _metadata and potentially self are shared with all forks. */ \ - child = clone3_vfork(); \ + child = fork(); \ if (child == 0) { \ fixture_name##_setup(_metadata, self, variant->data); \ /* Let setup failure terminate early. */ \ @@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t) .sa_flags = SA_SIGINFO, }; struct sigaction saved_action; - int status; + /* + * Sets status so that WIFEXITED(status) returns true and + * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value + * should never be evaluated because of the waitpid(2) check and + * SIGALRM handling. + */ + int status = KSFT_FAIL << 8; + int child; if (sigaction(SIGALRM, &action, &saved_action)) { t->exit_code = KSFT_FAIL; @@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t) __active_test = t; t->timed_out = false; alarm(t->timeout); - waitpid(t->pid, &status, 0); + child = waitpid(t->pid, &status, 0); + if (child == -1 && errno != EINTR) { + t->exit_code = KSFT_FAIL; + fprintf(TH_LOG_STREAM, + "# %s: Failed to wait for PID %d (errno: %d)\n", + t->name, t->pid, errno); + return; + } + alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { t->exit_code = KSFT_FAIL; @@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t) WTERMSIG(status)); } } else { + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test ended in some other way [%u]\n", t->name, @@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_xfail *xfail; char test_name[1024]; const char *diagnostic; + int child; /* reset test struct */ t->exit_code = KSFT_PASS; @@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f, fflush(stdout); fflush(stderr); - t->pid = clone3_vfork(); - if (t->pid < 0) { + child = fork(); + if (child < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->exit_code = KSFT_FAIL; - } else if (t->pid == 0) { + } else if (child == 0) { setpgrp(); t->fn(t, variant); _exit(t->exit_code); } else { + t->pid = child; __wait_for_test(t); } ksft_print_msg(" %4s %s\n", diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 2bfed46e0b19a..d663362565803 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -14,12 +14,12 @@ FIXTURE(scm_rights) { - int fd[16]; + int fd[32]; }; FIXTURE_VARIANT(scm_rights) { - char name[16]; + char name[32]; int type; int flags; bool test_listener; @@ -172,6 +172,8 @@ static void __create_sockets(struct __test_metadata *_metadata, const FIXTURE_VARIANT(scm_rights) *variant, int n) { + ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); + if (variant->test_listener) create_listeners(_metadata, self, n); else @@ -283,4 +285,23 @@ TEST_F(scm_rights, cross_edge) close_sockets(8); } +TEST_F(scm_rights, backtrack_from_scc) +{ + create_sockets(10); + + send_fd(0, 1); + send_fd(0, 4); + send_fd(1, 2); + send_fd(2, 3); + send_fd(3, 1); + + send_fd(5, 6); + send_fd(5, 9); + send_fd(6, 7); + send_fd(7, 8); + send_fd(8, 6); + + close_sockets(10); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index bdc03a2097e85..7ea5fb28c93db 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -85,6 +85,7 @@ static bool cfg_rx; static int cfg_runtime_ms = 4200; static int cfg_verbose; static int cfg_waittime_ms = 500; +static int cfg_notification_limit = 32; static bool cfg_zerocopy; static socklen_t cfg_alen; @@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; static int zerocopied = -1; static uint32_t next_completion; +static uint32_t sends_since_notify; static unsigned long gettimeofday_ms(void) { @@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) error(1, errno, "send"); if (cfg_verbose && ret != len) fprintf(stderr, "send: ret=%u != %u\n", ret, len); + sends_since_notify++; if (len) { packets++; @@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain) /* Detect notification gaps. These should not happen often, if at all. * Gaps can occur due to drops, reordering and retransmissions. */ - if (lo != next_completion) + if (cfg_verbose && lo != next_completion) fprintf(stderr, "gap: %u..%u does not append to %u\n", lo, hi, next_completion); next_completion = hi + 1; @@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain) static void do_recv_completions(int fd, int domain) { while (do_recv_completion(fd, domain)) {} + sends_since_notify = 0; } /* Wait for all remaining completions on the errqueue */ @@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol) else do_sendmsg(fd, &msg, cfg_zerocopy, domain); + if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit) + do_recv_completions(fd, domain); + while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) do_recv_completions(fd, domain); @@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv) if (cfg_ifindex == 0) error(1, errno, "invalid iface: %s", optarg); break; + case 'l': + cfg_notification_limit = strtoul(optarg, NULL, 0); + break; case 'm': cfg_cork_mixed = true; break; diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk index b909bee3cb2a3..abb9e58d95c46 100644 --- a/tools/testing/selftests/powerpc/flags.mk +++ b/tools/testing/selftests/powerpc/flags.mk @@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") export GIT_VERSION endif -ifeq ($(CFLAGS),) -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS) export CFLAGS -endif - diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index c7686fb6641a7..55315ed695f47 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -291,11 +291,30 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param return ret; } +static bool arch_supports_noncont_cat(const struct resctrl_test *test) +{ + unsigned int eax, ebx, ecx, edx; + + /* AMD always supports non-contiguous CBM. */ + if (get_vendor() == ARCH_AMD) + return true; + + /* Intel support for non-contiguous CBM needs to be discovered. */ + if (!strcmp(test->resource, "L3")) + __cpuid_count(0x10, 1, eax, ebx, ecx, edx); + else if (!strcmp(test->resource, "L2")) + __cpuid_count(0x10, 2, eax, ebx, ecx, edx); + else + return false; + + return ((ecx >> 3) & 1); +} + static int noncont_cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) { unsigned long full_cache_mask, cont_mask, noncont_mask; - unsigned int eax, ebx, ecx, edx, sparse_masks; + unsigned int sparse_masks; int bit_center, ret; char schemata[64]; @@ -304,15 +323,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test, if (ret) return ret; - if (!strcmp(test->resource, "L3")) - __cpuid_count(0x10, 1, eax, ebx, ecx, edx); - else if (!strcmp(test->resource, "L2")) - __cpuid_count(0x10, 2, eax, ebx, ecx, edx); - else - return -EINVAL; - - if (sparse_masks != ((ecx >> 3) & 1)) { - ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n"); + if (arch_supports_noncont_cat(test) != sparse_masks) { + ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); return 1; } diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c index 62397d5934f13..ed351a1cb917d 100644 --- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c +++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c @@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *)) asm(".option push \n\ .option arch, +v \n\ - vsetivli x0, 1, e32, ta, ma \n\ + vsetivli x0, 1, e32, m1, ta, ma \n\ vmv.s.x v0, %1 \n\ # Generate SIGSEGV \n\ lw a0, 0(x0) \n\ diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index e40dc5be2f668..d12ff955de0d8 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } return 0; @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) /* Check that a child process is in the new timens. */ for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5) + if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5) return pr_fail("%ld %ld\n", now.tv_sec + OFFSET, tst.tv_sec); } diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5e7f0051bd7be..5b939f59dfa4d 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime"); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index 9edd43d6b2c13..a4196bbd6e33f 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime(%d)", clockid); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index beb7614941fb1..5b8907bf451dd 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -32,7 +32,7 @@ static void *tcheck(void *_args) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) { + if (labs(tst.tv_sec - now->tv_sec) > 5) { pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", args->tst_name, tst.tv_sec, now->tv_sec); return (void *)1UL; @@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) + if (labs(tst.tv_sec - now->tv_sec) > 5) return pr_fail("%s: unexpected value: %ld (%ld)\n", tst_name, tst.tv_sec, now->tv_sec); } diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index d53a4d8008f94..98d8ba2afa008 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,35 +1,30 @@ # SPDX-License-Identifier: GPL-2.0 -include ../lib.mk - uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres +TEST_GEN_PROGS := vdso_test_gettimeofday +TEST_GEN_PROGS += vdso_test_getcpu +TEST_GEN_PROGS += vdso_test_abi +TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) -TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 +TEST_GEN_PROGS += vdso_standalone_test_x86 endif -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness +TEST_GEN_PROGS += vdso_test_correctness CFLAGS := -std=gnu99 -CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector -LDFLAGS_vdso_test_correctness := -ldl + ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -all: $(TEST_GEN_PROGS) +include ../lib.mk $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c + $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c - $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ - vdso_standalone_test_x86.c parse_vdso.c \ - -o $@ +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector + $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c - $(CC) $(CFLAGS) \ - vdso_test_correctness.c \ - -o $@ \ - $(LDFLAGS_vdso_test_correctness) +$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 413f75620a35b..4ae417372e9eb 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -55,14 +55,20 @@ static struct vdso_info ELF(Verdef) *verdef; } vdso_info; -/* Straight from the ELF specification. */ -static unsigned long elf_hash(const unsigned char *name) +/* + * Straight from the ELF specification...and then tweaked slightly, in order to + * avoid a few clang warnings. + */ +static unsigned long elf_hash(const char *name) { unsigned long h = 0, g; - while (*name) + const unsigned char *uch_name = (const unsigned char *)name; + + while (*uch_name) { - h = (h << 4) + *name++; - if (g = h & 0xf0000000) + h = (h << 4) + *uch_name++; + g = h & 0xf0000000; + if (g) h ^= g >> 24; h &= ~g; } diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 8a44ff973ee17..27f6fdf119691 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -18,7 +18,7 @@ #include "parse_vdso.h" -/* We need a libc functions... */ +/* We need some libc functions... */ int strcmp(const char *a, const char *b) { /* This implementation is buggy: it never returns -1. */ @@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b) return 0; } +/* + * The clang build needs this, although gcc does not. + * Stolen from lib/string.c. + */ +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} + /* ...and two syscalls. This is x86-specific. */ static inline long x86_syscall3(long nr, long a0, long a1, long a2) { @@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n) } } -__attribute__((externally_visible)) void c_main(void **stack) +void c_main(void **stack) { /* Parse the stack */ long argc = (long)*stack; diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index e95bd56b332f7..35856b11c1435 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu max -machine microvm -no-acpi +QEMU_MACHINE := -cpu max -machine microvm,acpi=off endif else ifeq ($(ARCH),i686) CHOST := i686-linux-musl @@ -120,9 +120,9 @@ KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi +QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off endif else ifeq ($(ARCH),mips64) CHOST := mips64-linux-musl